Commit e40a8f31 authored by mei-ye's avatar mei-ye
Browse files

merge with master

parent ab567439
......@@ -12,7 +12,7 @@ namespace migraph {
void eliminate_allocation::apply(program& p) const
{
assert(alignment > 0);
if (!enabled(MIGRAPH_DISABLE_MEMORY_COLORING{}))
if(!enabled(MIGRAPH_DISABLE_MEMORY_COLORING{}))
return;
std::size_t n = 0;
......
......@@ -548,12 +548,12 @@ struct load
{
return {s, args[0].data() + offset};
}
};
};
struct write_literal
{
std::size_t offset = 0;
bool pre_copy = false;
bool pre_copy = false;
std::string name() const { return "write_literal"; }
shape compute_shape(std::vector<shape> inputs) const { return inputs.at(1); }
argument compute(context&, const shape&, const std::vector<argument>&) const
......@@ -576,7 +576,6 @@ struct outline
return {s, nullptr};
}
};
} // namespace migraph
#endif
......@@ -215,8 +215,6 @@ inline const ValueType& any_cast(const pass& x)
}
#endif
} // namespace migraph
#endif
......@@ -18,9 +18,7 @@ struct program_impl;
const operation& get_operation(instruction_ref ins);
/**
/**
* @brief Stores the instruction stream
*/
struct program
......@@ -104,7 +102,6 @@ struct program
private:
std::unique_ptr<program_impl> impl;
};
} // namespace migraph
#endif
......@@ -15,9 +15,9 @@
namespace migraph {
using parameter_map = std::unordered_map<std::string, argument>;
#ifdef DOXYGEN
/// An interface for a compilation target
struct target
{
......@@ -137,7 +137,7 @@ struct target
virtual std::string name() const = 0;
virtual std::vector<pass> get_passes(context& ctx) const = 0;
virtual context get_context(parameter_map params) const = 0;
virtual context get_context(parameter_map params) const = 0;
};
template <typename PrivateDetailTypeErasedT>
......@@ -176,7 +176,10 @@ struct target
return private_detail_te_value.get_passes(ctx);
}
context get_context(parameter_map params) const override { return private_detail_te_value.get_context(params); }
context get_context(parameter_map params) const override
{
return private_detail_te_value.get_context(params);
}
PrivateDetailTypeErasedT private_detail_te_value;
};
......@@ -244,7 +247,6 @@ inline const ValueType& any_cast(const target& x)
}
#endif
} // namespace migraph
#endif
......@@ -14,15 +14,13 @@
namespace migraph {
#define MIGRAPH_DEBUG_OPT
//#define MIGRAPH_DEBUG_OPT
#ifdef MIGRAPH_DEBUG_OPT
#define MIGRAPH_DEBUG(s) s
#else
#define MIGRAPH_DEBUG(s)
#endif // MIGRAPH_DEBUG_OPT
} // namespace migraph
#endif // MIGRAPH_GUARD_RTGLIB_COMMON_HEADER_HPP
......@@ -5,7 +5,8 @@ namespace migraph {
void memory_coloring::apply(program& p) const
{
if (!enabled(MIGRAPH_DISABLE_MEMORY_COLORING{})) {
if(!enabled(MIGRAPH_DISABLE_MEMORY_COLORING{}))
{
memory_coloring_impl opt(&p);
opt.run();
}
......
......@@ -43,7 +43,7 @@ bool memory_coloring_impl::allocate(interval_ptr interval)
{
live_range* range = live_ranges[iter];
long long offset = range->offset;
if(offset != InvalidOffset)
if(offset != invalid_offset)
{
conflict_queue.push(range);
if(offset2_live.find(offset) == offset2_live.end())
......@@ -64,11 +64,13 @@ bool memory_coloring_impl::allocate(interval_ptr interval)
long long offset = 0;
while(!conflict_queue.empty())
{
live_range* range = conflict_queue.top();
live_range* range = conflict_queue.top();
long long iter_offset = range->offset;
if (offset > iter_offset) {
if(offset > iter_offset)
{
offset = std::max(offset, iter_offset + range->size);
} else if (offset2_live[iter_offset] == range)
}
else if(offset2_live[iter_offset] == range)
{
if((iter_offset > offset) && (iter_offset - offset) >= size)
{
......@@ -94,7 +96,7 @@ void memory_coloring_impl::build()
return;
int cur_points = num_of_instrs * 2;
instruction_ref iter = std::prev(p_program->end());
instruction_ref iter = p_program->end();
instruction_ref begin = p_program->begin();
std::vector<instruction_ref> dead_instrs;
std::set<int> live_set;
......@@ -102,6 +104,7 @@ void memory_coloring_impl::build()
live_intervals.resize(num_of_instrs);
do
{
iter = std::prev(iter);
const instruction* p_iter = &(*iter);
interval_ptr def_interval = nullptr;
bool is_dead = false;
......@@ -115,9 +118,9 @@ void memory_coloring_impl::build()
def_interval->result = iter->result;
def_interval->is_literal = is_lit;
alloc_queue.push(def_interval);
range.begin = cur_points;
range.begin = cur_points;
def_interval->def_point = cur_points;
range.size = (iter->result).bytes();
range.size = (iter->result).bytes();
live_set.erase(range.vn);
}
}
......@@ -134,17 +137,17 @@ void memory_coloring_impl::build()
{
if(is_output_param(arg))
is_dead = false;
if (def_interval != nullptr) {
if(def_interval != nullptr)
{
def_interval->is_live_on_entry = true;
}
continue;
}
const instruction* p_arg = &(*arg);
if(cnt == tie_ndx)
if(cnt == tie_ndx && (def_interval != nullptr))
{
// input memory is used as this instruction's output.
// def is considered as use. Coalesce the live intervals.
assert(def_interval != nullptr);
def_interval->add_use(cur_points);
instr2_live[p_arg] = def_interval;
}
......@@ -161,7 +164,7 @@ void memory_coloring_impl::build()
add_conflicts(live_set, max_value_number);
live_set.insert(max_value_number);
live_ranges[max_value_number] = &(interval->segment);
earliest_end_point = cur_points;
earliest_end_point = cur_points;
}
else
{
......@@ -173,21 +176,21 @@ void memory_coloring_impl::build()
if(is_dead)
dead_instrs.push_back(iter);
cur_points -= 2;
iter = std::prev(iter);
} while(iter != begin);
}
void memory_coloring_impl::register_operand_alias()
{
operand_alias["hip::allocate"] = -1;
operand_alias["@outline"] = -1;
operand_alias["@outline"] = -1;
operand_alias["check_context"] = -1;
operand_alias["@literal"] = -1;
operand_alias["@param"] = -1;
operand_alias["transpose"] = 0;
operand_alias["flatten"] = 0;
operand_alias["broadcast"] = 1;
operand_alias["reshape"] = 0;
operand_alias["@literal"] = -1;
operand_alias["@param"] = -1;
operand_alias["transpose"] = 0;
operand_alias["flatten"] = 0;
operand_alias["broadcast"] = 1;
operand_alias["reshape"] = 0;
operand_alias["pass"] = 0;
}
void memory_coloring_impl::rewrite()
......@@ -204,27 +207,30 @@ void memory_coloring_impl::rewrite()
if(instr2_live.find(p_iter) != instr2_live.end())
{
interval_ptr interval = instr2_live[p_iter];
if (interval->get_begin() == InvalidOffset)
if(interval->get_begin() == invalid_offset)
continue;
std::size_t offset = 0;
if(interval->get_offset() == InvalidOffset)
if(interval->get_offset() == invalid_offset)
{
assert(interval->result.bytes() == 0);
} else {
}
else
{
offset = interval->get_offset();
}
if(is_allocate(ins))
{
p_program->replace_instruction(
ins, load{ins->arguments.at(0)->result, offset}, scratch_param);
ins, load{ins->arguments.at(0)->result, offset}, scratch_param);
}
else if(is_literal(ins))
{
auto pre = p_program->add_literal(ins->lit);
auto pre = p_program->add_literal(ins->lit);
bool pre_copy = (interval->get_begin() < earliest_end_point);
p_program->replace_instruction(ins, write_literal{offset, pre_copy}, scratch_param, pre);
p_program->replace_instruction(
ins, write_literal{offset, pre_copy}, scratch_param, pre);
}
}
}
......@@ -233,9 +239,7 @@ void memory_coloring_impl::rewrite()
}
#ifdef MIGRAPH_DEBUG_OPT
// map liveness tracking point to instruction enum.
#define GET_INS_ENUM(x) (((x) > 0) ? (((x) >> 1) - 1) : InvalidOffset)
void memory_coloring_impl::dump(const std::string& str) { std::cout << str << std::endl; }
void memory_coloring_impl::dump_program() { std::cout << *p_program << std::endl; }
......@@ -274,12 +278,14 @@ void memory_coloring_impl::verify()
live_interval& interval = live_intervals[i];
live_range& segment = interval.segment;
if (segment.begin == InvalidOffset) {
if(segment.begin == invalid_offset)
{
assert(interval.is_live_on_entry);
continue;
}
if(segment.offset == InvalidOffset) {
if(segment.offset == invalid_offset)
{
continue;
}
int vn = segment.vn;
......@@ -289,7 +295,7 @@ void memory_coloring_impl::verify()
for(auto& iter : vn_set)
{
live_range* range = live_ranges[iter];
if(range->offset == InvalidOffset)
if(range->offset == invalid_offset)
continue;
if(!is_disjoin(*range, segment))
assert(false);
......@@ -298,12 +304,23 @@ void memory_coloring_impl::verify()
}
}
}
// map liveness tracking point to instruction enum.
static int get_ins_enum(int x)
{
if(x > 0)
{
return (x / 2) - 1;
}
else
return invalid_offset;
}
void live_range::dump()
{
std::cout << " segment:" << vn;
std::cout << " [" << GET_INS_ENUM(begin) << ", " << GET_INS_ENUM(end) << "]";
if(offset != InvalidOffset)
std::cout << " [" << get_ins_enum(begin) << ", " << get_ins_enum(end) << "]";
if(offset != invalid_offset)
{
std::cout << " mem:";
std::cout << " [" << offset << "," << offset + size - 1 << "]";
......@@ -318,10 +335,10 @@ void live_interval::dump()
std::cout << " uses:";
for(auto& iter : use_points)
{
std::cout << " " << GET_INS_ENUM(iter) << ",";
std::cout << " " << get_ins_enum(iter) << ",";
}
std::cout << " def:";
std::cout << " " << GET_INS_ENUM(def_point);
std::cout << " " << get_ins_enum(def_point);
if(is_literal)
std::cout << " literal";
......
......@@ -4,7 +4,7 @@
namespace migraph {
static const int InvalidOffset = -1;
static const int invalid_offset = -1;
struct live_range
{
......@@ -20,11 +20,11 @@ struct live_range
struct live_interval
{
live_interval() : segment({InvalidOffset, InvalidOffset, InvalidOffset, InvalidOffset, 0})
live_interval() : segment({invalid_offset, invalid_offset, invalid_offset, invalid_offset, 0})
{
id = InvalidOffset;
def_point = InvalidOffset;
is_literal = false;
id = invalid_offset;
def_point = invalid_offset;
is_literal = false;
is_live_on_entry = false;
}
......@@ -46,7 +46,7 @@ struct live_interval
bool is_live_on_entry;
};
typedef live_interval* interval_ptr;
using interval_ptr = live_interval*;
struct memory_coloring_impl
{
......@@ -93,7 +93,7 @@ struct memory_coloring_impl
int get_input_tie_ndx(const instruction_ref ins)
{
std::string name = ins->op.name();
if (operand_alias.find(name) != operand_alias.end())
if(operand_alias.find(name) != operand_alias.end())
return operand_alias[name];
int cnt = -1;
......@@ -104,16 +104,14 @@ struct memory_coloring_impl
if(is_allocate(arg) || is_output_param(arg))
last_allocate = cnt;
}
if (last_allocate != -1)
operand_alias[name] = last_allocate;
else
assert("unknown operand alias");
assert((last_allocate != -1));
operand_alias[name] = last_allocate;
return last_allocate;
}
#ifdef MIGRAPH_DEBUG_OPT
static bool is_disjoin(live_range& range1, live_range& range2)
{
if ((range1.size == 0) || (range2.size == 0))
if((range1.size == 0) || (range2.size == 0))
return false;
long long end1 = range1.offset + range1.size - 1;
long long end2 = range2.offset + range2.size - 1;
......
......@@ -219,7 +219,7 @@ instruction_ref program::get_parameter(std::string name) const
if(ins != this->end())
return ins;
else
return this->end();
return this->end();
}
std::unordered_map<std::string, shape> program::get_parameter_shapes() const
......@@ -260,7 +260,7 @@ instruction_ref program::validate() const
void program::compile(const target& t, tracer trace, parameter_map params)
{
assert(this->validate() == impl->instructions.end());
this->impl->ctx = t.get_context(params);
this->impl->ctx = t.get_context(std::move(params));
if(not trace.enabled() and enabled(MIGRAPH_TRACE_COMPILE{}))
trace = tracer{std::cout};
trace(*this);
......@@ -444,5 +444,4 @@ std::ostream& operator<<(std::ostream& os, const program& p)
print_program(os, p, [](auto&&...) {});
return os;
}
} // namespace migraph
......@@ -7,13 +7,12 @@
namespace migraph {
namespace cpu {
using parameter_map = std::unordered_map<std::string, argument>;
struct context
{
parameter_map params;
void finish() const {}
};
} // namespace cpu
} // namespace migraph
......
......@@ -11,11 +11,12 @@ struct cpu_target
{
std::string name() const;
std::vector<pass> get_passes(migraph::context& ctx) const;
migraph::context get_context(parameter_map params = parameter_map()) const { return context{params}; }
migraph::context get_context(parameter_map params = parameter_map()) const
{
return context{std::move(params)};
}
};
} // namespace cpu
} // namespace migraph
#endif
......@@ -13,9 +13,9 @@ namespace gpu {
void eliminate_workspace::apply(program& p) const
{
if (!enabled(MIGRAPH_DISABLE_MEMORY_COLORING{}))
if(!enabled(MIGRAPH_DISABLE_MEMORY_COLORING{}))
return;
std::size_t n = 0;
std::vector<instruction_ref> allocs;
for(auto ins : iterator_for(p))
......
......@@ -93,8 +93,6 @@ void gpu_sync() { hipDeviceSynchronize(); }
void copy_to_gpu(char* dst, const char* src, std::size_t size)
{
hipMemcpy(dst, src, size, hipMemcpyHostToDevice);
}
}
} // namespace gpu
} // namespace migraph
......@@ -19,9 +19,7 @@ struct context
std::vector<argument> literals{};
void finish() const { gpu_sync(); }
};
} // namespace gpu
} // namespace migraph
#endif
......@@ -15,7 +15,7 @@ migraph::argument from_gpu(migraph::argument arg);
void gpu_sync();
void copy_to_gpu(char* dst, const char* src, std::size_t size);
void copy_to_gpu(char* dst, const char* src, std::size_t size);
struct hip_allocate
{
......@@ -73,17 +73,15 @@ struct hip_memcpy
shape compute_shape(std::vector<shape> inputs) const { return inputs.at(1); }
argument compute(context&, shape output_shape, std::vector<argument> args) const
{
char* dst = args.at(0).data() + offset;
const char* src = args.at(1).data();
std::size_t size = args.at(1).get_shape().bytes();
char* dst = args.at(0).data() + offset;
const char* src = args.at(1).data();
std::size_t size = args.at(1).get_shape().bytes();
copy_to_gpu(dst, src, size);
return {output_shape, dst};
return {std::move(output_shape), dst};
}
std::size_t offset = 0;
};
} // namespace gpu
} // namespace migraph
#endif
......@@ -15,9 +15,7 @@ struct lowering_memory_coloring
void apply(program& p) const;
};
} // namespace gpu
} // namespace migraph
#endif
......@@ -12,9 +12,7 @@ struct target
std::vector<pass> get_passes(migraph::context& gctx) const;
migraph::context get_context(parameter_map params = parameter_map()) const;
};
} // namespace gpu
} // namespace migraph
#endif
......@@ -108,8 +108,11 @@ struct miopen_convolution
workspace_shape = shape{shape::int8_type, {workspace_size}};
auto x = to_gpu(generate_argument(inputs[0]->get_shape()));
gpu_sync();
auto w = to_gpu(generate_argument(inputs[1]->get_shape()));
gpu_sync();
auto y = to_gpu(generate_argument(output_shape));
gpu_sync();
auto workspace = allocate_gpu(workspace_shape);
int algo_count = 1;
......
......@@ -12,10 +12,7 @@ struct gen_base_addr
{
shape s;
std::string name() const { return "gen_base_addr"; }
shape compute_shape(const std::vector<shape>&) const
{
return s;
}
shape compute_shape(const std::vector<shape>&) const { return s; }
argument compute(const context& ctx, const shape&, const std::vector<argument>&) const
{
return ctx.scratch;
......@@ -24,58 +21,65 @@ struct gen_base_addr
void lowering_memory_coloring::apply(program& p) const
{
if (enabled(MIGRAPH_DISABLE_MEMORY_COLORING{}))
if(enabled(MIGRAPH_DISABLE_MEMORY_COLORING{}))
return;
assert(ctx != nullptr);
auto scratch_ins = p.get_parameter("scratch");
if (scratch_ins == p.end())
if(scratch_ins == p.end())
return;
bool can_resolve_addr = false;
argument base_ptr;
shape s_scratch = scratch_ins->result;
if (ctx->params.find("scratch") == ctx->params.end()) {
if(ctx->params.find("scratch") == ctx->params.end())
{
// scratch memory is not passed in, allocate memory here.
can_resolve_addr = true;
base_ptr = allocate_gpu(s_scratch, false);
} else {
base_ptr = allocate_gpu(s_scratch, false);
}
else
{
argument a = ctx->params["scratch"];
assert((a.get_shape().bytes() >= s_scratch.bytes()) && "insufficent scratch memory");
if (!a.empty()) {
if(!a.empty())
{
// scratch memory is passed in and already has a known address.
can_resolve_addr = true;
base_ptr = a;
base_ptr = a;
}
}
if (can_resolve_addr) {
if(can_resolve_addr)
{
ctx->scratch = base_ptr;
scratch_ins = p.replace_instruction(scratch_ins, gen_base_addr{s_scratch});
scratch_ins = p.replace_instruction(scratch_ins, gen_base_addr{s_scratch});
}
for(auto ins : iterator_for(p))
{
{
if(ins->op.name() == "write_literal")
{
std::vector<instruction_ref>& args = ins->arguments;
instruction_ref arg0 = args.at(0);
instruction_ref arg1 = args.at(1);
shape s_arg1 = arg1->get_shape();
std::size_t size = s_arg1.bytes();
auto&& a = any_cast<write_literal>(ins->op);
instruction_ref arg0 = args.at(0);
instruction_ref arg1 = args.at(1);
shape s_arg1 = arg1->get_shape();
std::size_t size = s_arg1.bytes();
auto&& a = any_cast<write_literal>(ins->op);
std::size_t offset = a.offset;
if (can_resolve_addr && a.pre_copy) {
char* dst = base_ptr.data() + offset;
if(can_resolve_addr && a.pre_copy)
{
char* dst = base_ptr.data() + offset;
const char* src = arg1->lit.data();
copy_to_gpu(dst, src, size);
gpu_sync();
p.replace_instruction(ins, load{s_arg1, offset}, scratch_ins);
p.remove_instruction(arg1);
} else {
}
else
{
p.replace_instruction(ins, hip_memcpy{offset}, arg0, arg1);
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment