Commit 3885c9bc authored by mei-ye's avatar mei-ye
Browse files

merge in develop

parent a5b0afa0
#ifndef MIGRAPHX_GUARD_RTGLIB_PRE_SCHEDULING_IMPL_HPP
#define MIGRAPHX_GUARD_RTGLIB_PRE_SCHEDULING_IMPL_HPP
#include <migraphx/common_header.hpp>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/insert_instruction.hpp>
namespace migraphx {
struct dag_node
{
dag_node()
{
weight = 0;
run_on_cpu = 0;
weight_sum = 0;
ins_ndx = -1;
first_child = nullptr;
stream = -1;
partition = -1;
sched_cycle = -1;
earliest_cycle = -1;
}
int weight;
int run_on_cpu;
int weight_sum;
int ins_ndx;
dag_node* first_child;
int stream;
int partition;
int sched_cycle;
int earliest_cycle = -1;
instruction_ref ins;
bool is_literal() const { return (ins->name() == "@literal"); }
bool can_use_stream() const { return (run_on_cpu == 0); }
#ifdef MIGRAPHX_DEBUG_OPT
void dump();
#endif
};
struct dag_partition
{
dag_partition()
{
num_of_partition = 0;
weight_sum.clear();
}
int create_partition()
{
weight_sum.push_back(0);
return num_of_partition++;
}
void add_weight(dag_node* node)
{
if(node->partition >= 0)
{
assert(node->partition < num_of_partition);
weight_sum[node->partition] += node->weight;
}
}
int num_of_partition;
std::vector<int> weight_sum;
};
struct stream_info
{
stream_info(int n) : num_of_streams(n)
{
max_cycle = 0;
next_cycles.clear();
for(auto stream = 0; stream < num_of_streams; ++stream)
next_cycles.push_back(0);
}
std::vector<int> next_cycles;
int num_of_streams;
int max_cycle;
};
struct pre_scheduling_impl
{
pre_scheduling_impl(program* p,
std::function<std::pair<int, int>(const operation&)> w,
int n,
insert_instruction ins,
bool v)
: p_program(p),
weight_func(std::move(w)),
num_of_streams(n),
insert_instr(std::move(ins)),
enable_verify(v)
{
instr2_node.clear();
instr2_mask.clear();
instr2_stream.clear();
}
void schedule(std::list<dag_node*>&);
void compute_weights();
int get_stream(stream_info&, dag_node*);
void record(stream_info&, dag_node*);
void reorder();
void run();
void splice(std::list<dag_node*>&);
void annotate(std::list<dag_node*>&);
static bool compare_exit_nodes(dag_node* d1, dag_node* d2)
{
return (d1->weight_sum > d2->weight_sum);
}
struct weighted_topology_ordering
{
bool operator()(const dag_node* d1, const dag_node* d2) const
{
if(d1->weight_sum < d2->weight_sum)
{
// smaller weigth_sum is placed on top of the queue.
return false;
}
else if(d1->weight_sum > d2->weight_sum)
{
return true;
}
else
{
// smaller instrution index is placed on top of the queue,
return d1->ins_ndx > d2->ins_ndx;
}
}
};
struct post_schedule_ordering
{
bool operator()(const dag_node* d1, const dag_node* d2) const
{
if(d1->sched_cycle == d2->sched_cycle)
{
if(d1->stream == d2->stream)
{
// smaller instruction index on top of queue.
return d1->ins_ndx > d2->ins_ndx;
}
else
{
// smaller stream on top of queue.
return (d1->stream > d2->stream);
}
}
else
{
// smaller sched_cycle on top of queue.
return (d1->sched_cycle > d2->sched_cycle);
}
}
};
bool has_mask(instruction_ref ins, unsigned int m)
{
if(instr2_mask.find(ins) != instr2_mask.end())
{
unsigned int mask = instr2_mask[ins];
return ((mask & (1u << m)) != 0);
}
return false;
}
void add_mask(instruction_ref ins, unsigned int m)
{
unsigned int mask = (instr2_mask.find(ins) != instr2_mask.end()) ? instr2_mask[ins] : 0;
if((mask & (1u << m)) == 0)
instr2_mask[ins] = (mask + (1u << m));
}
void verify();
#ifdef MIGRAPHX_DEBUG_OPT
void dump(const std::string&);
void dump_program();
void dump(std::list<dag_node*>&);
#endif
static const int min_partition_threshold = 2;
private:
program* p_program;
std::function<std::pair<int, int>(const operation&)> weight_func;
int num_of_streams;
insert_instruction insert_instr;
std::vector<dag_node> nodes;
std::vector<dag_node*> exit_nodes;
std::unordered_map<instruction_ref, dag_node*> instr2_node;
std::unordered_map<instruction_ref, int> instr2_stream;
std::unordered_map<instruction_ref, unsigned int> instr2_mask;
dag_partition partition_info;
bool enable_verify;
};
} // namespace migraphx
#endif
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include <migraphx/ranges.hpp> #include <migraphx/ranges.hpp>
#include <migraphx/time.hpp> #include <migraphx/time.hpp>
#include <migraphx/iterator_for.hpp> #include <migraphx/iterator_for.hpp>
#include <migraphx/pass_config.hpp>
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include <algorithm> #include <algorithm>
...@@ -52,7 +53,12 @@ static void print_instruction(std::ostream& os, ...@@ -52,7 +53,12 @@ static void print_instruction(std::ostream& os,
} }
os << ")"; os << ")";
} }
if(ins->get_stream() >= 0)
os << "(stream=" << ins->get_stream() << ")";
if(ins->has_mask(wait_event))
os << " wait ";
if(ins->has_mask(record_event))
os << " record=" << ins->get_event();
os << " -> " << ins->get_shape(); os << " -> " << ins->get_shape();
} }
...@@ -323,12 +329,18 @@ void program::compile(const target& t, tracer trace) ...@@ -323,12 +329,18 @@ void program::compile(const target& t, tracer trace)
void program::finalize() void program::finalize()
{ {
int max_event = -1;
for(auto ins : iterator_for(*this)) for(auto ins : iterator_for(*this))
{ {
ins->finalize(this->impl->ctx); ins->finalize(this->impl->ctx);
max_event = std::max(max_event, ins->get_event());
} }
if(max_event >= 0)
this->impl->ctx.create_events(max_event + 1);
} }
void program::finish() { this->impl->ctx.finish(); }
template <class F> template <class F>
argument generic_eval(const program& p, argument generic_eval(const program& p,
context& ctx, context& ctx,
...@@ -340,8 +352,12 @@ argument generic_eval(const program& p, ...@@ -340,8 +352,12 @@ argument generic_eval(const program& p,
results.reserve(p.size() * 2); results.reserve(p.size() * 2);
std::vector<argument> values; std::vector<argument> values;
values.reserve(16); values.reserve(16);
bool enable_event_as_instr = enabled(MIGRAPHX_ENABLE_EVENT_AS_INSTRUCTION{});
for(auto ins : iterator_for(p)) for(auto ins : iterator_for(p))
{ {
int stream = ins->get_stream();
// ctx.set_stream(stream);
if(ins->name() == "@literal") if(ins->name() == "@literal")
{ {
results.emplace(ins, trace(ins, [&] { return ins->get_literal().get_argument(); })); results.emplace(ins, trace(ins, [&] { return ins->get_literal().get_argument(); }));
...@@ -368,9 +384,26 @@ argument generic_eval(const program& p, ...@@ -368,9 +384,26 @@ argument generic_eval(const program& p,
assert(results.find(i) != results.end()); assert(results.find(i) != results.end());
return results[i]; return results[i];
}); });
if(!enable_event_as_instr && ins->has_mask(wait_event))
{
for(auto&& arg : ins->inputs())
{
int arg_s = arg->get_stream();
if((arg_s < 0) || (arg_s == stream))
continue;
int event = arg->get_event();
assert(event >= 0);
ctx.wait_event(event);
}
}
results.emplace(ins, trace(ins, [&] { results.emplace(ins, trace(ins, [&] {
return ins->get_operator().compute(ctx, ins->get_shape(), values); return ins->get_operator().compute(ctx, ins->get_shape(), values);
})); }));
if(!enable_event_as_instr && ins->has_mask(record_event))
ctx.record_event(ins->get_event());
} }
assert(results.find(ins) != results.end()); assert(results.find(ins) != results.end());
} }
...@@ -534,6 +567,5 @@ std::ostream& operator<<(std::ostream& os, const program& p) ...@@ -534,6 +567,5 @@ std::ostream& operator<<(std::ostream& os, const program& p)
print_program(os, p, [](auto&&...) {}); print_program(os, p, [](auto&&...) {});
return os; return os;
} }
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
...@@ -9,9 +9,12 @@ namespace cpu { ...@@ -9,9 +9,12 @@ namespace cpu {
struct context struct context
{ {
void finish() const {} void finish() {}
void set_stream(int) {}
void create_events(int) {}
void record_event(int) {}
void wait_event(int) {}
}; };
} // namespace cpu } // namespace cpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <migraphx/gpu/miopen.hpp> #include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/rocblas.hpp> #include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/hip.hpp> #include <migraphx/gpu/hip.hpp>
#include <migraphx/gpu/machine_model.hpp>
#include <migraphx/env.hpp> #include <migraphx/env.hpp>
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
...@@ -11,13 +12,13 @@ namespace migraphx { ...@@ -11,13 +12,13 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_DISABLE_NULL_STREAM)
struct hip_device struct hip_device
{ {
hip_device() { add_stream(); } using hip_event_ptr = MIGRAPHX_MANAGE_PTR(hipEvent_t, hipEventDestroy);
hip_device() { add_streams(); }
hip_device(std::size_t id) : device_id(id) { add_stream(); } hip_device(std::size_t id) : device_id(id) { add_streams(); }
struct stream struct stream
{ {
...@@ -32,7 +33,8 @@ struct hip_device ...@@ -32,7 +33,8 @@ struct hip_device
static hip_stream_ptr create_stream() static hip_stream_ptr create_stream()
{ {
hipStream_t result = nullptr; hipStream_t result = nullptr;
auto status = hipStreamCreate(&result); auto status = hipStreamCreateWithFlags(&result, hipStreamNonBlocking);
if(status != hipSuccess) if(status != hipSuccess)
MIGRAPHX_THROW("Failed to allocate stream"); MIGRAPHX_THROW("Failed to allocate stream");
return hip_stream_ptr{result}; return hip_stream_ptr{result};
...@@ -84,16 +86,61 @@ struct hip_device ...@@ -84,16 +86,61 @@ struct hip_device
shared<rocblas_handle_ptr> rbhandle = nullptr; shared<rocblas_handle_ptr> rbhandle = nullptr;
}; };
void add_stream() { streams.emplace_back(device_id); } static hip_event_ptr create_event()
{
hipEvent_t event;
auto status = hipEventCreateWithFlags(&event, hipEventDisableTiming);
if(status != hipSuccess)
MIGRAPHX_THROW("Failed to creat event");
return hip_event_ptr{event};
}
void add_streams()
{
int num_of_streams = 1;
assert(streams.empty());
if(enabled(MIGRAPHX_DISABLE_NULL_STREAM{}))
num_of_streams = stream_info().num_of_streams();
for(int i = 0; i < num_of_streams; ++i)
streams.emplace_back(device_id);
}
stream& get_stream() { return streams.at(current_stream); } stream& get_stream() { return streams.at(current_stream); }
void set_stream(std::size_t n) { current_stream = n; } void set_stream(std::size_t n) { current_stream = n; }
void create_events(int num_of_events)
{
for(int i = events.size(); i < num_of_events; ++i)
events.emplace_back(create_event());
}
void record_event(int event)
{
hipEventRecord(events.at(event).get(), streams.at(current_stream).get());
}
void wait_event(int event)
{
hipStreamWaitEvent(streams.at(current_stream).get(), events.at(event).get(), 0);
}
void stream_sync()
{
if(enabled(MIGRAPHX_DISABLE_NULL_STREAM{}))
{
int num_of_streams = streams.size();
if(num_of_streams > 0)
{
for(int i = 0; i < num_of_streams; i++)
hipStreamSynchronize(streams.at(i).get());
}
}
}
private: private:
std::size_t device_id = 0; std::size_t device_id = 0;
std::size_t current_stream = 0; std::size_t current_stream = 0;
std::vector<stream> streams; std::vector<stream> streams;
std::vector<shared<hip_event_ptr>> events;
}; };
struct context struct context
...@@ -107,9 +154,21 @@ struct context ...@@ -107,9 +154,21 @@ struct context
} }
hip_device::stream& get_stream() { return get_current_device().get_stream(); } hip_device::stream& get_stream() { return get_current_device().get_stream(); }
void set_stream(int n)
{
if(n >= 0)
get_current_device().set_stream(n);
}
void create_events(int num_of_events) { get_current_device().create_events(num_of_events); }
void record_event(int event) { get_current_device().record_event(event); }
void wait_event(int event) { get_current_device().wait_event(event); }
std::vector<argument> literals{}; std::vector<argument> literals{};
void finish() const { gpu_sync(); } void finish()
{
get_current_device().stream_sync();
gpu_sync();
}
private: private:
// TODO: Make this a vector to support multiple devices // TODO: Make this a vector to support multiple devices
......
#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_EVENT_HPP
#define MIGRAPHX_GUARD_MIGRAPHLIB_EVENT_HPP
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/config.hpp>
#include <utility>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct create_events
{
int num_of_events = 0;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.num_of_events, "event"));
}
std::string name() const { return "gpu::create_events"; }
shape compute_shape(const std::vector<shape>&) const { return {}; }
argument compute(context& ctx, const shape&, const std::vector<argument>&) const
{
ctx.create_events(num_of_events);
return {};
}
};
struct record_event
{
int event = -1;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.event, "event"));
}
std::string name() const { return "gpu::record_event"; }
shape compute_shape(const std::vector<shape>&) const { return {}; }
argument compute(context& ctx, const shape&, const std::vector<argument>&) const
{
ctx.record_event(event);
return {};
}
};
struct wait_event
{
int event = -1;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.event, "event"));
}
std::string name() const { return "gpu::wait_event"; }
shape compute_shape(const std::vector<shape>&) const { return {}; }
argument compute(context& ctx, const shape&, const std::vector<argument>&) const
{
ctx.wait_event(event);
return {};
}
};
struct set_stream
{
int stream = -1;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.stream, "stream"));
}
std::string name() const { return "gpu::set_stream"; }
shape compute_shape(const std::vector<shape>&) const { return {}; }
argument compute(context& ctx, const shape&, const std::vector<argument>&) const
{
assert(stream >= 0);
ctx.set_stream(stream);
return {};
}
void finalize(context& ctx, const shape&, const std::vector<shape>&) { ctx.set_stream(stream); }
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_FIND_CONCUR_GPU_HPP
#define MIGRAPHX_GUARD_RTGLIB_FIND_CONCUR_GPU_HPP
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/config.hpp>
#include <migraphx/dom_info.hpp>
#include <migraphx/common_header.hpp>
#include <unordered_map>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct find_concur_gpu
{
void get_concur(program* p,
int num_of_streams,
std::unordered_map<const instruction*,
std::vector<std::vector<const instruction*>>>& concur_instrs,
std::unordered_map<const instruction*, int>& instr2_points) const
{
dom_info info(p);
info.compute_dom(true);
info.propagate_splits(num_of_streams, concur_instrs, instr2_points);
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_INSERT_INSTRUCTION_GPU_HPP
#define MIGRAPHX_GUARD_RTGLIB_INSERT_INSTRUCTION_GPU_HPP
#include <migraphx/instruction_ref.hpp>
#include <migraphx/program.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/event.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct insert_instruction_gpu
{
void insert_create_events(program* p, instruction_ref ins, int num_of_events)
{
p->insert_instruction(ins, create_events{num_of_events});
}
void insert_record_event(program* p, instruction_ref ins, int event)
{
p->insert_instruction(ins, record_event{event});
}
void insert_wait_event(program* p, instruction_ref ins, int event)
{
p->insert_instruction(ins, wait_event{event});
}
void insert_stream(program* p, instruction_ref ins, int stream)
{
p->insert_instruction(ins, set_stream{stream});
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_MIOPEN_MACHINE_MODEL_HPP
#define MIGRAPHX_GUARD_MIGRAPHLIB_MIOPEN_MACHINE_MODEL_HPP
#include <string>
#include <unordered_map>
#include <migraphx/pass_config.hpp>
#include <migraphx/operation.hpp>
namespace migraphx {
namespace gpu {
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_DISABLE_NULL_STREAM)
struct op_info
{
op_info()
{
// First in pair denotes weight. Second in pair tells
// that the instruction is run ONLY on CPU.
weight_map["convolution"] = std::make_pair(4, 0);
weight_map["pooling"] = std::make_pair(2, 0);
weight_map["gemm"] = std::make_pair(2, 0);
weight_map["broadcast"] = std::make_pair(1, 1);
weight_map["multibroadcast"] = std::make_pair(1, 1);
weight_map["contiguous"] = std::make_pair(1, 1);
weight_map["transpose"] = std::make_pair(1, 1);
weight_map["load"] = std::make_pair(1, 1);
weight_map["@param"] = std::make_pair(1, 1);
weight_map["@literal"] = std::make_pair(1, 1);
weight_map["hip::load_literal"] = std::make_pair(1, 1);
weight_map["hip::allocate"] = std::make_pair(0, 1);
weight_map["@outline"] = std::make_pair(0, 1);
weight_map["gpu::convolution"] = std::make_pair(4, 0);
weight_map["gpu::conv_bias_relu"] = std::make_pair(4, 0);
weight_map["gpu::pooling"] = std::make_pair(2, 0);
weight_map["gpu::gemm"] = std::make_pair(2, 0);
weight_map["gpu::concat"] = std::make_pair(1, 0);
weight_map["hip::add_relu"] = std::make_pair(2, 0);
}
std::pair<int, int> operator()(const operation& op)
{
if(weight_map.find(op.name()) != weight_map.end())
{
return weight_map[op.name()];
}
else
{
return std::make_pair(1, 0);
}
}
std::unordered_map<std::string, std::pair<int, int>> weight_map;
};
struct stream_info
{
int num_of_streams()
{
if(!enabled(MIGRAPHX_DISABLE_NULL_STREAM{}))
return 0;
else
return 4;
}
};
} // namespace gpu
} // namespace migraphx
#endif
...@@ -18,6 +18,10 @@ ...@@ -18,6 +18,10 @@
#include <migraphx/rewrite_rnn.hpp> #include <migraphx/rewrite_rnn.hpp>
#include <migraphx/eliminate_concat.hpp> #include <migraphx/eliminate_concat.hpp>
#include <migraphx/gpu/concat_gpu_opt.hpp> #include <migraphx/gpu/concat_gpu_opt.hpp>
#include <migraphx/pre_scheduling.hpp>
#include <migraphx/gpu/machine_model.hpp>
#include <migraphx/gpu/find_concur_gpu.hpp>
#include <migraphx/gpu/insert_instruction_gpu.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -26,6 +30,8 @@ namespace gpu { ...@@ -26,6 +30,8 @@ namespace gpu {
std::vector<pass> target::get_passes(migraphx::context& gctx) const std::vector<pass> target::get_passes(migraphx::context& gctx) const
{ {
auto& ctx = any_cast<context>(gctx); auto& ctx = any_cast<context>(gctx);
std::function<std::pair<int, int>(const operation&)> weight_func = op_info();
int num_of_streams = stream_info().num_of_streams();
// clang-format off // clang-format off
return return
{ {
...@@ -51,7 +57,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx) const ...@@ -51,7 +57,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx) const
fuse_ops{&ctx}, fuse_ops{&ctx},
dead_code_elimination{}, dead_code_elimination{},
write_literals{&ctx}, write_literals{&ctx},
memory_coloring{"hip::allocate"}, pre_scheduling{weight_func, num_of_streams, insert_instruction_gpu{}},
memory_coloring{"hip::allocate", num_of_streams, find_concur_gpu{}},
eliminate_workspace{}, eliminate_workspace{},
eliminate_allocation{"hip::allocate"}, eliminate_allocation{"hip::allocate"},
check_context<context>{}, check_context<context>{},
......
...@@ -47,6 +47,10 @@ struct non_computable_cf ...@@ -47,6 +47,10 @@ struct non_computable_cf
struct test_context struct test_context
{ {
void finish() const {} void finish() const {}
void set_stream(int) {}
void create_events(int) {}
void record_event(int) {}
void wait_event(int) {}
}; };
TEST_CASE(literal_test) TEST_CASE(literal_test)
......
#include <migraphx/pre_scheduling.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/program.hpp>
#include <migraphx/cpu/target.hpp>
#include <basic_ops.hpp>
#include <test.hpp>
// This is a test to trigger the code in cpu's context.hpp and runtime
// codes in program.cpp.
//
TEST_CASE(test1)
{
migraphx::program p;
auto in1 = p.add_parameter("0", migraphx::shape{migraphx::shape::float_type, {32, 64, 1, 1}});
auto in2 = p.add_parameter("1", migraphx::shape{migraphx::shape::float_type, {64, 64, 1, 1}});
auto p1 = p.add_instruction(migraphx::op::convolution{}, in1, in2);
p1->set_stream(0);
auto in3 = p.add_parameter("2", migraphx::shape{migraphx::shape::float_type, {64, 64, 1, 1}});
auto p2 = p.add_instruction(migraphx::op::convolution{}, in1, in3);
p2->set_stream(1);
p2->set_event(0);
p2->add_mask(migraphx::record_event);
auto p3 = p.add_instruction(migraphx::op::concat{1}, p1, p2);
p3->set_stream(0);
p3->add_mask(migraphx::wait_event);
p.compile(migraphx::cpu::target{});
migraphx::program::parameter_map m;
for(auto&& x : p.get_parameter_shapes())
{
m[x.first] = migraphx::generate_argument(x.second);
}
p.eval(m);
}
int main(int argc, const char* argv[]) { test::run(argc, argv); }
...@@ -11,6 +11,10 @@ struct id_target ...@@ -11,6 +11,10 @@ struct id_target
struct context struct context
{ {
void finish() const {} void finish() const {}
void set_stream(int) {}
void create_events(int) {}
void record_event(int) {}
void wait_event(int) {}
}; };
migraphx::context ctx = context{}; migraphx::context ctx = context{};
std::string name() const { return "id"; } std::string name() const { return "id"; }
......
#include <test.hpp>
#include <basic_ops.hpp>
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/gpu/target.hpp>
#include <migraphx/cpu/target.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/verify_args.hpp>
migraphx::program create_program(bool is_cpu)
{
migraphx::program p;
auto in1 = p.add_parameter("0", migraphx::shape{migraphx::shape::float_type, {32, 64, 1, 1}});
auto in2 = p.add_parameter("1", migraphx::shape{migraphx::shape::float_type, {64, 64, 1, 1}});
auto p1 = p.add_instruction(migraphx::op::convolution{}, in1, in2);
auto in3 = p.add_parameter("2", migraphx::shape{migraphx::shape::float_type, {64, 64, 1, 1}});
auto p2 = p.add_instruction(migraphx::op::convolution{}, in1, in3);
if(is_cpu)
{
p2->set_event(0);
p2->add_mask(migraphx::record_event);
}
auto p3 = p.add_instruction(migraphx::op::concat{1}, p1, p2);
if(is_cpu)
{
p3->add_mask(migraphx::wait_event);
}
return p;
}
migraphx::argument run_gpu()
{
setenv("MIGRAPHX_DISABLE_NULL_STREAM", "1", 1);
migraphx::program p = create_program(false);
p.compile(migraphx::gpu::target{});
migraphx::program::parameter_map m;
for(auto&& x : p.get_parameter_shapes())
{
m[x.first] = migraphx::gpu::to_gpu(migraphx::generate_argument(x.second));
}
auto ret_val = migraphx::gpu::from_gpu(p.eval(m));
p.finish();
return ret_val;
}
migraphx::argument run_cpu()
{
migraphx::program p = create_program(true);
p.compile(migraphx::cpu::target{});
migraphx::program::parameter_map m;
for(auto&& x : p.get_parameter_shapes())
{
m[x.first] = migraphx::generate_argument(x.second);
}
return p.eval(m);
}
void gpu_stream_execution_test()
{
auto result1 = run_gpu();
auto result2 = run_cpu();
verify_args("test", result2, result1);
}
int main() { gpu_stream_execution_test(); }
...@@ -2,15 +2,47 @@ ...@@ -2,15 +2,47 @@
#include <migraphx/operators.hpp> #include <migraphx/operators.hpp>
#include <migraphx/generate.hpp> #include <migraphx/generate.hpp>
#include <migraphx/instruction.hpp> #include <migraphx/instruction.hpp>
#include <migraphx/dom_info.hpp>
#include <migraphx/common_header.hpp>
#include <migraphx/instruction.hpp>
#include <basic_ops.hpp> #include <basic_ops.hpp>
#include <test.hpp> #include <test.hpp>
struct set_stream
{
int stream = -1;
std::string name() const { return "gpu::set_stream"; }
migraphx::shape compute_shape(const std::vector<migraphx::shape>& inputs) const
{
if(inputs.empty())
return {};
else
return inputs.front();
}
};
struct find_concur
{
void get_concur(
migraphx::program* p,
int num_of_streams,
std::unordered_map<const migraphx::instruction*,
std::vector<std::vector<const migraphx::instruction*>>>& concur_instrs,
std::unordered_map<const migraphx::instruction*, int>& instr2_points) const
{
migraphx::dom_info info(p);
info.compute_dom(true);
info.propagate_splits(num_of_streams, concur_instrs, instr2_points);
}
};
struct memory_coloring_target struct memory_coloring_target
{ {
std::string name() const { return "memory_coloring"; } std::string name() const { return "memory_coloring"; }
std::vector<migraphx::pass> get_passes(migraphx::context&) const std::vector<migraphx::pass> get_passes(migraphx::context&) const
{ {
return {migraphx::memory_coloring{"allocate", true}}; return {migraphx::memory_coloring{"allocate", 4, find_concur{}, true}};
} }
migraphx::context get_context() const { return {}; } migraphx::context get_context() const { return {}; }
}; };
...@@ -608,4 +640,47 @@ TEST_CASE(literal_test) ...@@ -608,4 +640,47 @@ TEST_CASE(literal_test)
CHECK(lit == result); CHECK(lit == result);
} }
TEST_CASE(concurrent_test)
{
migraphx::program p;
auto in = p.add_parameter("0", migraphx::shape{migraphx::shape::float_type, {40}});
auto a1 = add_alloc(p, {migraphx::shape::float_type, {40}});
auto p1 = p.add_instruction(pass_op{}, a1, in);
p.insert_instruction(p1, set_stream{0});
p1->set_stream(0);
p1->add_mask(migraphx::record_event);
auto a2 = add_alloc(p, {migraphx::shape::float_type, {40}});
auto p2 = p.add_instruction(pass_op{}, a2, p1);
p2->set_stream(0);
auto a4 = add_alloc(p, {migraphx::shape::float_type, {40}});
auto p4 = p.add_instruction(pass_op{}, a4, p2);
p4->set_stream(0);
auto a3 = add_alloc(p, {migraphx::shape::float_type, {40}});
auto p3 = p.add_instruction(pass_op{}, a3, p1);
p3->set_stream(1);
p.insert_instruction(p3, set_stream{1});
p3->add_mask(migraphx::wait_event);
auto a5 = add_alloc(p, {migraphx::shape::float_type, {40}});
auto p5 = p.add_instruction(pass_op{}, a5, p3);
p5->set_stream(1);
p5->add_mask(migraphx::record_event);
auto a6 = add_alloc(p, {migraphx::shape::float_type, {40}});
auto p6 = p.add_instruction(pass_op{}, a6, p1);
p6->set_stream(2);
p6->add_mask(migraphx::wait_event);
p.insert_instruction(p6, set_stream{2});
auto a7 = add_alloc(p, {migraphx::shape::float_type, {40}});
auto p7 = p.add_instruction(pass_op{}, a7, p6);
p7->set_stream(2);
p7->add_mask(migraphx::record_event);
auto a8 = add_alloc(p, {migraphx::shape::float_type, {40}});
auto p8 = p.add_instruction(migraphx::op::concat{0}, a8, p4, p5, p7);
;
p8->set_stream(0);
p8->add_mask(migraphx::wait_event);
p.insert_instruction(p8, set_stream{0});
p.compile(memory_coloring_target{});
CHECK(p.get_parameter_shape("scratch").bytes() == 960);
}
int main(int argc, const char* argv[]) { test::run(argc, argv); } int main(int argc, const char* argv[]) { test::run(argc, argv); }
#include <migraphx/pre_scheduling.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/program.hpp>
#include <basic_ops.hpp>
#include <test.hpp>
struct set_stream
{
int stream = -1;
std::string name() const { return "set_stream"; }
migraphx::shape compute_shape(const std::vector<migraphx::shape>& inputs) const
{
if(inputs.empty())
return {};
else
return inputs.front();
}
};
struct create_events
{
int num_of_events = 0;
std::string name() const { return "gpu::create_events"; }
migraphx::shape compute_shape(const std::vector<migraphx::shape>& inputs) const
{
if(inputs.empty())
return {};
else
return inputs.front();
}
};
struct weight_func
{
weight_func()
{
weight_map["@param"] = std::make_pair(1, 1);
weight_map["@literal"] = std::make_pair(1, 1);
};
std::pair<int, int> operator()(const migraphx::operation& op)
{
if(weight_map.find(op.name()) != weight_map.end())
return weight_map[op.name()];
else
return std::make_pair(1, 0);
}
std::unordered_map<std::string, std::pair<int, int>> weight_map;
};
struct insert_instruction
{
void insert_stream(migraphx::program* p, migraphx::instruction_ref ins, int stream)
{
p->insert_instruction(ins, set_stream{stream});
}
void insert_create_events(migraphx::program*, migraphx::instruction_ref, int) {}
void insert_record_event(migraphx::program*, migraphx::instruction_ref, int) {}
void insert_wait_event(migraphx::program*, migraphx::instruction_ref, int) {}
};
struct stream_execution_target
{
struct context
{
void finish() const {}
void set_stream(int) {}
void create_events(int) {}
void record_event(int) {}
void wait_event(int) {}
};
migraphx::context ctx = context{};
std::string name() const { return "stream_execution"; }
std::vector<migraphx::pass> get_passes(migraphx::context&) const
{
return {migraphx::pre_scheduling{weight_func(), 2, insert_instruction{}, true}};
}
migraphx::context get_context() const { return {ctx}; }
};
TEST_CASE(test1)
{
migraphx::program p;
auto in1 =
p.add_parameter("0", migraphx::shape{migraphx::shape::float_type, {32, 256, 35, 35}});
auto l1 =
p.add_literal(migraphx::generate_literal({migraphx::shape::float_type, {64, 256, 1, 1}}));
auto p1 = p.add_instruction(migraphx::op::convolution{}, in1, l1);
auto l2 =
p.add_literal(migraphx::generate_literal({migraphx::shape::float_type, {48, 256, 1, 1}}));
auto p2 = p.add_instruction(migraphx::op::convolution{}, in1, l2);
p.add_instruction(migraphx::op::concat{1}, p1, p2);
p.compile(stream_execution_target{});
std::cout << p << std::endl;
CHECK(std::count_if(
p.begin(), p.end(), [](auto&& ins) { return ins.name() == "set_stream"; }) == 3);
CHECK(std::count_if(p.begin(), p.end(), [](auto&& ins) { return ins.get_stream() == 0; }) == 2);
CHECK(std::count_if(p.begin(), p.end(), [](auto&& ins) { return ins.get_stream() == 1; }) == 1);
CHECK(std::count_if(p.begin(), p.end(), [](auto&& ins) {
return ins.has_mask(migraphx::record_event);
}) == 1);
CHECK(std::count_if(p.begin(), p.end(), [](auto&& ins) {
return ins.has_mask(migraphx::wait_event);
}) == 1);
}
int main(int argc, const char* argv[]) { test::run(argc, argv); }
...@@ -20,19 +20,26 @@ inline namespace MIGRAPHX_INLINE_NS { ...@@ -20,19 +20,26 @@ inline namespace MIGRAPHX_INLINE_NS {
struct context struct context
{ {
/// Wait for any tasks in the context to complete /// Wait for any tasks in the context to complete
void finish() const; void finish();
void set_stream(int ndx);
void create_events(int num_of_events);
void record_event(int event);
void wait_event(int event);
}; };
#else #else
<% <%
interface('context', interface('context',
virtual('finish', returns='void', const=True) virtual('finish', returns='void'),
virtual('set_stream', returns='void', input = 'int'),
virtual('create_events', returns='void', input = 'int'),
virtual('record_event', returns='void', input = 'int'),
virtual('wait_event', returns='void', input = 'int'),
) )
%> %>
#endif #endif
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
#ifndef MIGRAPHX_GUARD_FIND_CONCUR_HPP
#define MIGRAPHX_GUARD_FIND_CONCUR_HPP
#include <cassert>
#include <string>
#include <functional>
#include <memory>
#include <type_traits>
#include <utility>
#include <unordered_map>
#include <vector>
#include <migraphx/instruction.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
struct program;
#ifdef DOXYGEN
/// An interface for target-dependent analysis to find concurrent instructions
/// executing in different streams.
struct find_concur
{
void get_concur(program* p,
int num_of_streams,
std::unordered_map<const instruction*,
std::vector<std::vector<const instruction*>>>& concur_instrs,
std::unordered_map<const instruction*, int>& instr2_points);
} const;
#else
<%
interface('find_concur',
virtual('get_concur', returns='void', p = 'program*', num_of_stream = 'int', concur_instrs = 'std::unordered_map<const instruction*, std::vector<std::vector<const instruction*>>>&', input = 'std::unordered_map<const instruction*, int>&', const=True)
)
%>
#endif
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_INSERT_INSTRUCTION_HPP
#define MIGRAPHX_GUARD_INSERT_INSTRUCTION_HPP
#include <cassert>
#include <string>
#include <functional>
#include <memory>
#include <type_traits>
#include <utility>
#include <migraphx/instruction_ref.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
struct program;
#ifdef DOXYGEN
/// An interface for target-dependent instruction insertion.
/// for multi-stream execution.
struct insert_instruction
{
void insert_create_events(program* p, instruction_ref ins, int num_of_events);
void insert_record_event(program* p, instruction_ref ins, int event);
void insert_wait_event(program* p, instruction_ref ins, int event);
void insert_stream(program* p, instruction_ref ins, int stream);
};
#else
<%
interface('insert_instruction',
virtual('insert_create_events', returns='void', p = 'program*', ins ='instruction_ref', input = 'int'),
virtual('insert_record_event', returns='void', p = 'program*', ins ='instruction_ref', input = 'int'),
virtual('insert_wait_event', returns='void', p = 'program*', ins = 'instruction_ref', input = 'int'),
virtual('insert_stream', returns='void', p = 'program*', ins ='instruction_ref', input = 'int')
)
%>
#endif
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment