Commit bf6f82d8 authored by Paul's avatar Paul
Browse files

Merge from develop

parents 6a0797e2 b93f5320
...@@ -63,11 +63,11 @@ bool memory_coloring_impl::allocate(interval_ptr interval) ...@@ -63,11 +63,11 @@ bool memory_coloring_impl::allocate(interval_ptr interval)
} }
} }
long long offset = 0; std::size_t offset = 0;
while(!conflict_queue.empty()) while(!conflict_queue.empty())
{ {
live_range* range = conflict_queue.top(); live_range* range = conflict_queue.top();
long long iter_offset = range->offset; std::size_t iter_offset = range->offset;
if(offset > iter_offset) if(offset > iter_offset)
{ {
offset = std::max(offset, iter_offset + range->size); offset = std::max(offset, iter_offset + range->size);
...@@ -97,7 +97,7 @@ void memory_coloring_impl::build() ...@@ -97,7 +97,7 @@ void memory_coloring_impl::build()
if(num_of_instrs == 0) if(num_of_instrs == 0)
return; return;
int cur_points = num_of_instrs * 2; auto cur_points = num_of_instrs * 2;
instruction_ref iter = p_program->end(); instruction_ref iter = p_program->end();
instruction_ref begin = p_program->begin(); instruction_ref begin = p_program->begin();
std::vector<instruction_ref> dead_instrs; std::vector<instruction_ref> dead_instrs;
...@@ -193,13 +193,13 @@ void memory_coloring_impl::rewrite() ...@@ -193,13 +193,13 @@ void memory_coloring_impl::rewrite()
continue; continue;
std::size_t offset = 0; std::size_t offset = 0;
if(interval->get_offset() == invalid_offset) if(interval->get_offset() != invalid_offset)
{ {
assert(interval->result.bytes() == 0); offset = interval->get_offset();
} }
else else
{ {
offset = interval->get_offset(); assert(interval->result.bytes() == 0);
} }
if(is_allocate(ins)) if(is_allocate(ins))
...@@ -207,15 +207,6 @@ void memory_coloring_impl::rewrite() ...@@ -207,15 +207,6 @@ void memory_coloring_impl::rewrite()
p_program->replace_instruction( p_program->replace_instruction(
ins, op::load{ins->get_shape(), offset}, scratch_param); ins, op::load{ins->get_shape(), offset}, scratch_param);
} }
else if(is_literal(ins))
{
#if 0
auto pre = p_program->add_literal(ins->lit);
bool pre_copy = (interval->get_begin() < earliest_end_point);
p_program->replace_instruction(
ins, write_literal{offset, pre_copy}, scratch_param, pre);
#endif
}
} }
} }
MIGRAPHX_DEBUG(dump("---After rewrite---")); MIGRAPHX_DEBUG(dump("---After rewrite---"));
......
...@@ -21,15 +21,15 @@ ...@@ -21,15 +21,15 @@
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
static const int invalid_offset = -1; static const std::size_t invalid_offset = std::numeric_limits<std::size_t>::max();
struct live_range struct live_range
{ {
int begin; // begin point in the instruction stream. std::size_t begin; // begin point in the instruction stream.
int end; // end point in the instruction stream. std::size_t end; // end point in the instruction stream.
long long offset; // offset to base pointer of allocated memory trunk. std::size_t offset; // offset to base pointer of allocated memory trunk.
int vn; // value number that identifies this live_range. std::size_t vn; // value number that identifies this live_range.
long long size; // size of required memory in bytes std::size_t size; // size of required memory in bytes
#ifdef MIGRAPHX_DEBUG_OPT #ifdef MIGRAPHX_DEBUG_OPT
void dump(); void dump();
#endif #endif
...@@ -45,9 +45,9 @@ struct live_interval ...@@ -45,9 +45,9 @@ struct live_interval
is_live_on_entry = false; is_live_on_entry = false;
} }
void add_use(int use) { use_points.push_front(use); } void add_use(std::size_t use) { use_points.push_front(use); }
int get_begin() const { return segment.begin; } std::size_t get_begin() const { return segment.begin; }
int get_end() const { return segment.end; } std::size_t get_end() const { return segment.end; }
long long get_offset() const { return segment.offset; } long long get_offset() const { return segment.offset; }
#ifdef MIGRAPHX_DEBUG_OPT #ifdef MIGRAPHX_DEBUG_OPT
...@@ -55,9 +55,9 @@ struct live_interval ...@@ -55,9 +55,9 @@ struct live_interval
#endif #endif
live_range segment; live_range segment;
int id; std::size_t id;
std::list<int> use_points; std::list<std::size_t> use_points;
int def_point; std::size_t def_point;
shape result; shape result;
bool is_literal; bool is_literal;
bool is_live_on_entry; bool is_live_on_entry;
...@@ -111,8 +111,8 @@ struct memory_coloring_impl ...@@ -111,8 +111,8 @@ struct memory_coloring_impl
{ {
if((range1.size == 0) || (range2.size == 0)) if((range1.size == 0) || (range2.size == 0))
return false; return false;
long long end1 = range1.offset + range1.size - 1; auto end1 = range1.offset + range1.size - 1;
long long end2 = range2.offset + range2.size - 1; auto end2 = range2.offset + range2.size - 1;
return ((end1 < range2.offset) || (end2 < range1.offset)); return ((end1 < range2.offset) || (end2 < range1.offset));
} }
void verify(); void verify();
...@@ -125,8 +125,8 @@ struct memory_coloring_impl ...@@ -125,8 +125,8 @@ struct memory_coloring_impl
{ {
bool operator()(const interval_ptr i1, const interval_ptr i2) const bool operator()(const interval_ptr i1, const interval_ptr i2) const
{ {
int len1 = i1->get_end() - i1->get_begin(); auto len1 = i1->get_end() - i1->get_begin();
int len2 = i2->get_end() - i2->get_begin(); auto len2 = i2->get_end() - i2->get_begin();
if(len1 != len2) if(len1 != len2)
{ {
return (len1 < len2); return (len1 < len2);
...@@ -158,7 +158,7 @@ struct memory_coloring_impl ...@@ -158,7 +158,7 @@ struct memory_coloring_impl
int num_of_lives; int num_of_lives;
int max_value_number; int max_value_number;
long long required_bytes; std::size_t required_bytes;
// The earliest program point where an live interval ends. // The earliest program point where an live interval ends.
int earliest_end_point; int earliest_end_point;
// The latest program point where an live interval ends. // The latest program point where an live interval ends.
......
...@@ -63,11 +63,16 @@ static void print_program(const program& p, F print_func) ...@@ -63,11 +63,16 @@ static void print_program(const program& p, F print_func)
for(auto ins : iterator_for(p)) for(auto ins : iterator_for(p))
{ {
std::string var_name = "@" + std::to_string(count); std::string var_name;
if(ins->name() == "@param") if(ins->name() == "@param")
{ {
var_name = any_cast<builtin::param>(ins->get_operator()).parameter; var_name = any_cast<builtin::param>(ins->get_operator()).parameter;
} }
else
{
var_name = "@" + std::to_string(count);
count++;
}
names.emplace(ins, var_name); names.emplace(ins, var_name);
// TODO: Use all_of // TODO: Use all_of
...@@ -78,16 +83,76 @@ static void print_program(const program& p, F print_func) ...@@ -78,16 +83,76 @@ static void print_program(const program& p, F print_func)
} }
print_func(ins, names); print_func(ins, names);
count++;
} }
} }
program::program() : impl(std::make_unique<program_impl>()) {} program::program() : impl(std::make_unique<program_impl>()) {}
program::program(program&&) noexcept = default; program::program(program&&) noexcept = default;
program& program::operator=(program&&) noexcept = default; program::~program() noexcept = default;
program::~program() noexcept = default;
// copy constructor
program::program(const program& p) { assign(p); }
// copy assignment operator
program& program::operator=(program p)
{
std::swap(p.impl, this->impl);
return *this;
}
void program::assign(const program& p)
{
// clean the current program
if(!impl)
{
impl = std::make_unique<program_impl>();
}
else if(!impl->instructions.empty())
{
impl->instructions.clear();
}
impl->ctx = p.impl->ctx;
std::unordered_map<instruction_ref, instruction_ref> ins_map;
for(auto ins : iterator_for(p))
{
instruction_ref copy_ins{};
if(ins->name() == "@literal")
{
auto l = ins->get_literal();
copy_ins = impl->instructions.insert(impl->instructions.end(), instruction{l});
}
else if(ins->name() == "@param")
{
auto&& name = any_cast<builtin::param>(ins->get_operator()).parameter;
auto s = ins->get_shape();
copy_ins = impl->instructions.insert(impl->instructions.end(),
{builtin::param{name}, std::move(s), {}});
}
else if(ins->name() == "@outline")
{
auto s = ins->get_shape();
copy_ins =
impl->instructions.insert(impl->instructions.end(), {builtin::outline{s}, s, {}});
}
else
{
// retrieve its mapped input
auto inputs = ins->inputs();
// ensure all inputs have its corresponding copy instructions
assert(std::all_of(
inputs.begin(), inputs.end(), [&](auto i) { return ins_map.count(i) > 0; }));
std::vector<instruction_ref> copy_inputs(inputs.size());
std::transform(inputs.begin(), inputs.end(), copy_inputs.begin(), [&](auto i) {
return ins_map[i];
});
copy_ins = add_instruction(ins->get_operator(), copy_inputs);
}
ins_map[ins] = copy_ins;
}
}
instruction_ref program::add_instruction(const operation& op, std::vector<instruction_ref> args) instruction_ref program::add_instruction(const operation& op, std::vector<instruction_ref> args)
{ {
......
...@@ -22,22 +22,32 @@ bool skip_propogate(instruction_ref ins) ...@@ -22,22 +22,32 @@ bool skip_propogate(instruction_ref ins)
void propagate_constant::apply(program& p) const void propagate_constant::apply(program& p) const
{ {
fix([&](auto self, auto ins) { for(auto i : iterator_for(p))
if(not skip_propogate(ins)) {
{ if(i->name() != "@literal")
auto r = ins->eval(); continue;
if(not r.empty()) if(i->outputs().empty())
continue;
fix([&](auto self, auto ins) {
std::unordered_set<instruction_ref> children(ins->outputs().begin(),
ins->outputs().end());
for(auto child : children)
{ {
assert(r.get_shape() == ins->get_shape()); if(skip_propogate(child))
auto l = p.add_literal(r.get_shape(), r.data()); {
p.replace_instruction(ins, l); self(child);
return; continue;
}
auto r = child->eval();
if(not r.empty())
{
assert(r.get_shape() == child->get_shape());
auto l = p.add_literal(r.get_shape(), r.data());
self(p.replace_instruction(child, l));
}
} }
} })(i);
std::unordered_set<instruction_ref> children(ins->inputs().begin(), ins->inputs().end()); }
for(auto child : children)
self(child);
})(std::prev(p.end()));
} }
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
......
...@@ -48,6 +48,12 @@ struct cpu_batch_norm_inference ...@@ -48,6 +48,12 @@ struct cpu_batch_norm_inference
{ {
op::batch_norm_inference op; op::batch_norm_inference op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "cpu::batch_norm_inference"; } std::string name() const { return "cpu::batch_norm_inference"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); } shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
...@@ -107,6 +113,12 @@ struct cpu_lrn ...@@ -107,6 +113,12 @@ struct cpu_lrn
{ {
op::lrn op; op::lrn op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "cpu::lrn"; } std::string name() const { return "cpu::lrn"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); } shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, shape output_shape, std::vector<argument> args) const argument compute(context&, shape output_shape, std::vector<argument> args) const
...@@ -117,7 +129,7 @@ struct cpu_lrn ...@@ -117,7 +129,7 @@ struct cpu_lrn
int channels = output_shape.lens()[1]; int channels = output_shape.lens()[1];
int height = output_shape.lens()[2]; int height = output_shape.lens()[2];
int width = output_shape.lens()[3]; int width = output_shape.lens()[3];
float alphaoverarea = op.alpha / op.size; float alphaoverarea = op.alpha / float(op.size);
int radius = (op.size - 1) / 2; int radius = (op.size - 1) / 2;
par_dfor(n_batch, height, width)([&](int b, int h, int w) { par_dfor(n_batch, height, width)([&](int b, int h, int w) {
...@@ -144,6 +156,12 @@ struct cpu_convolution ...@@ -144,6 +156,12 @@ struct cpu_convolution
{ {
op::convolution op; op::convolution op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "cpu::convolution"; } std::string name() const { return "cpu::convolution"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); } shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, shape output_shape, std::vector<argument> args) const argument compute(context&, shape output_shape, std::vector<argument> args) const
...@@ -165,15 +183,15 @@ struct cpu_convolution ...@@ -165,15 +183,15 @@ struct cpu_convolution
output_shape.lens()[2], output_shape.lens()[2],
output_shape.lens()[3])( output_shape.lens()[3])(
[&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) { [&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) {
const int start_x = i * op.stride[0] - op.padding[0]; const auto start_x = i * op.stride[0] - op.padding[0];
const int start_y = j * op.stride[1] - op.padding[1]; const auto start_y = j * op.stride[1] - op.padding[1];
const int group_id = w / (wei_n / op.group); const auto group_id = w / (wei_n / op.group);
double acc = 0; double acc = 0;
dfor(wei_c, wei_h, wei_w)([&](std::size_t k, std::size_t x, std::size_t y) { dfor(wei_c, wei_h, wei_w)([&](std::size_t k, std::size_t x, std::size_t y) {
const int in_x = start_x + x; const auto in_x = start_x + x;
const int in_y = start_y + y; const auto in_y = start_y + y;
const int in_ch = group_id * wei_c + k; const auto in_ch = group_id * wei_c + k;
if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w) if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w)
{ {
acc += input(o, in_ch, in_x, in_y) * weights(w, k, x, y); acc += input(o, in_ch, in_x, in_y) * weights(w, k, x, y);
...@@ -190,6 +208,12 @@ struct cpu_im2col ...@@ -190,6 +208,12 @@ struct cpu_im2col
{ {
op::im2col op; op::im2col op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
static std::string name() { return "cpu::im2col"; } static std::string name() { return "cpu::im2col"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); } shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
...@@ -209,10 +233,8 @@ struct cpu_im2col ...@@ -209,10 +233,8 @@ struct cpu_im2col
const std::size_t& stride_h = op.stride[0]; const std::size_t& stride_h = op.stride[0];
const std::size_t& stride_w = op.stride[1]; const std::size_t& stride_w = op.stride[1];
int kdiv2_h; auto kdiv2_h = kernel_h / 2;
int kdiv2_w; auto kdiv2_w = kernel_w / 2;
kdiv2_h = kernel_h / 2;
kdiv2_w = kernel_w / 2;
// calculate output sizes // calculate output sizes
const std::size_t col_height = (height - kernel_h + 2 * pad_h) / stride_h + 1; const std::size_t col_height = (height - kernel_h + 2 * pad_h) / stride_h + 1;
const std::size_t col_width = (width - kernel_w + 2 * pad_w) / stride_w + 1; const std::size_t col_width = (width - kernel_w + 2 * pad_w) / stride_w + 1;
...@@ -230,8 +252,8 @@ struct cpu_im2col ...@@ -230,8 +252,8 @@ struct cpu_im2col
dfor(channels, dfor(channels,
kernel_h, kernel_h,
kernel_w)([&](std::size_t c, std::size_t koffset, std::size_t loffset) { kernel_w)([&](std::size_t c, std::size_t koffset, std::size_t loffset) {
int idx = iinput + koffset - kdiv2_h; auto idx = iinput + koffset - kdiv2_h;
int jdx = jinput + loffset - kdiv2_w; auto jdx = jinput + loffset - kdiv2_w;
col(ldx, p) = ((idx >= 0) && (idx < height) && (jdx >= 0) && (jdx < width)) col(ldx, p) = ((idx >= 0) && (idx < height) && (jdx >= 0) && (jdx < width))
? input(0, c, idx, jdx) ? input(0, c, idx, jdx)
: 0; : 0;
...@@ -273,6 +295,12 @@ struct cpu_pooling ...@@ -273,6 +295,12 @@ struct cpu_pooling
{ {
op::pooling op; op::pooling op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "cpu::pooling_" + Op::name(); } std::string name() const { return "cpu::pooling_" + Op::name(); }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); } shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, const shape& output_shape, std::vector<argument> args) const argument compute(context&, const shape& output_shape, std::vector<argument> args) const
...@@ -317,20 +345,35 @@ struct cpu_pooling ...@@ -317,20 +345,35 @@ struct cpu_pooling
} }
}; };
struct cpu_contiguous struct cpu_op
{ {
op::contiguous op; operation op;
std::string name() const { return "cpu::contiguous"; } std::string name() const { return "cpu::" + op.name(); }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); } shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, const shape& output_shape, std::vector<argument> args) const argument compute(context&, const shape& output_shape, const std::vector<argument>& args) const
{
return op.compute(output_shape, args);
}
friend bool operator==(const cpu_op& x, const cpu_op& y) { return x.op == y.op; }
friend bool operator==(const cpu_op& x, const operation& y)
{ {
return op.compute(output_shape, std::move(args)); if(x.name() != y.name())
return false;
return x == any_cast<cpu_op>(y);
} }
friend bool operator==(const operation& x, const cpu_op& y) { return y == x; }
}; };
struct cpu_pad struct cpu_pad
{ {
op::pad op; op::pad op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "cpu::contiguous"; } std::string name() const { return "cpu::contiguous"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); } shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, const shape& output_shape, std::vector<argument> args) const argument compute(context&, const shape& output_shape, std::vector<argument> args) const
...@@ -354,20 +397,15 @@ struct cpu_pad ...@@ -354,20 +397,15 @@ struct cpu_pad
} }
}; };
struct cpu_concat
{
op::concat op;
std::string name() const { return "cpu::concat"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{
return op.compute(output_shape, std::move(args));
}
};
struct cpu_gemm struct cpu_gemm
{ {
op::dot op; op::dot op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "cpu::dot"; } std::string name() const { return "cpu::dot"; }
shape compute_shape(const std::vector<shape>& inputs) const shape compute_shape(const std::vector<shape>& inputs) const
{ {
...@@ -410,162 +448,6 @@ struct cpu_gemm ...@@ -410,162 +448,6 @@ struct cpu_gemm
} }
}; };
struct cpu_gather
{
op::gather op;
std::string name() const { return "cpu::gather"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{
return op.compute(output_shape, std::move(args));
}
};
struct identity_op
{
std::string name() const { return "cpu::identity"; }
auto fcn() const
{
return [](auto x) { return x; };
}
};
struct abs_op
{
std::string name() const { return "cpu::abs"; }
auto fcn() const
{
return [](auto x) { return std::abs(make_signed(x)); };
}
};
struct exp_op
{
std::string name() const { return "cpu::exp"; }
auto fcn() const
{
return [](auto x) { return std::exp(x); };
}
};
struct log_op
{
std::string name() const { return "cpu::log"; }
auto fcn() const
{
return [](auto x) { return std::log(x); };
}
};
struct sin_op
{
std::string name() const { return "cpu::sin"; }
auto fcn() const
{
return [](auto x) { return std::sin(x); };
}
};
struct cos_op
{
std::string name() const { return "cpu::cos"; }
auto fcn() const
{
return [](auto x) { return std::cos(x); };
}
};
struct tan_op
{
std::string name() const { return "cpu::tan"; }
auto fcn() const
{
return [](auto x) { return std::tan(x); };
}
};
struct asin_op
{
std::string name() const { return "cpu::asin"; }
auto fcn() const
{
return [](auto x) { return std::asin(x); };
}
};
struct acos_op
{
std::string name() const { return "cpu::acos"; }
auto fcn() const
{
return [](auto x) { return std::acos(x); };
}
};
struct atan_op
{
std::string name() const { return "cpu::atan"; }
auto fcn() const
{
return [](auto x) { return std::atan(x); };
}
};
struct sinh_op
{
std::string name() const { return "cpu::sinh"; }
auto fcn() const
{
return [](auto x) { return std::sinh(x); };
}
};
struct cosh_op
{
std::string name() const { return "cpu::cosh"; }
auto fcn() const
{
return [](auto x) { return std::cosh(x); };
}
};
struct tanh_op
{
std::string name() const { return "cpu::tanh"; }
auto fcn() const
{
return [](auto x) { return std::tanh(x); };
}
};
struct sigmoid_op
{
std::string name() const { return "cpu::sigmoid"; }
auto fcn() const
{
return [](auto x) { return 1.f / (1.f + std::exp(-x)); };
}
};
struct neg_op
{
std::string name() const { return "cpu::neg"; }
auto fcn() const
{
return [](auto x) { return -x; };
}
};
struct relu_op
{
std::string name() const { return "cpu::relu"; }
auto fcn() const
{
return [](auto x) { return std::max(decltype(x){0}, x); };
}
};
struct leaky_relu_op struct leaky_relu_op
{ {
op::leaky_relu op; op::leaky_relu op;
...@@ -592,14 +474,42 @@ template <typename Op> ...@@ -592,14 +474,42 @@ template <typename Op>
struct cpu_unary struct cpu_unary
{ {
Op op; Op op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op.op, f);
}
std::string name() const { return op.name(); } std::string name() const { return op.name(); }
shape compute_shape(const std::vector<shape>& inputs) const { return inputs.front(); } shape compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs}.has(1);
auto s = inputs.at(0);
if(s.packed())
{
return s;
}
else
{
return {s.type(), s.lens()};
}
}
argument compute(context&, const shape& output_shape, std::vector<argument> args) const argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{ {
argument result{output_shape}; argument result{output_shape};
result.visit([&](auto output) { result.visit([&](auto output) {
args[0].visit([&](auto input) { args[0].visit([&](auto input) {
std::transform(input.begin(), input.end(), output.begin(), op.fcn()); if(input.get_shape().standard())
{
std::transform(input.begin(), input.end(), output.begin(), op.fcn());
}
else
{
shape_for_each(output.get_shape(), [&](const auto& idx) {
output(idx.begin(), idx.end()) = op.fcn()(input(idx.begin(), idx.end()));
});
}
}); });
}); });
...@@ -622,20 +532,20 @@ struct softmax2d ...@@ -622,20 +532,20 @@ struct softmax2d
auto nw = input.get_shape().lens()[3]; auto nw = input.get_shape().lens()[3];
dfor(nb, nh, nw)([&](std::size_t b, std::size_t i, std::size_t j) { dfor(nb, nh, nw)([&](std::size_t b, std::size_t i, std::size_t j) {
value_type cmax = std::numeric_limits<value_type>::lowest(); value_type cmax = std::numeric_limits<value_type>::lowest();
for(int c = 0; c < nc; c++) for(std::size_t c = 0; c < nc; c++)
{ {
cmax = std::max(cmax, input(b, c, i, j)); cmax = std::max(cmax, input(b, c, i, j));
} }
for(int c = 0; c < nc; c++) for(std::size_t c = 0; c < nc; c++)
{ {
output(b, c, i, j) = std::exp(input(b, c, i, j) - cmax); output(b, c, i, j) = std::exp(input(b, c, i, j) - cmax);
} }
value_type sum = value_type(0); value_type sum = value_type(0);
for(int c = 0; c < nc; c++) for(std::size_t c = 0; c < nc; c++)
{ {
sum += output(b, c, i, j); sum += output(b, c, i, j);
} }
for(int c = 0; c < nc; c++) for(std::size_t c = 0; c < nc; c++)
{ {
output(b, c, i, j) = output(b, c, i, j) / sum; output(b, c, i, j) = output(b, c, i, j) / sum;
} }
...@@ -648,6 +558,13 @@ struct softmax2d ...@@ -648,6 +558,13 @@ struct softmax2d
struct cpu_logsoftmax struct cpu_logsoftmax
{ {
op::logsoftmax op; op::logsoftmax op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "cpu::logsoftmax"; } std::string name() const { return "cpu::logsoftmax"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); } shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
...@@ -714,87 +631,6 @@ struct cpu_logsoftmax ...@@ -714,87 +631,6 @@ struct cpu_logsoftmax
} }
}; };
struct add_op
{
std::string name() const { return "add"; }
auto fcn() const
{
return [](auto x, auto y) { return x + y; };
}
};
struct sub_op
{
std::string name() const { return "sub"; }
auto fcn() const
{
return [](auto x, auto y) { return x - y; };
}
};
struct mul_op
{
std::string name() const { return "mul"; }
auto fcn() const
{
return [](auto x, auto y) { return x * y; };
}
};
struct div_op
{
std::string name() const { return "div"; }
auto fcn() const
{
return [](auto x, auto y) { return x / y; };
}
};
struct max_op
{
std::string name() const { return "max"; }
auto fcn() const
{
return [](auto x, auto y) { return std::max(x, y); };
}
};
struct min_op
{
std::string name() const { return "min"; }
auto fcn() const
{
return [](auto x, auto y) { return std::min(x, y); };
}
};
template <typename Op>
struct cpu_binary
{
Op op;
std::string name() const { return op.name(); }
shape compute_shape(const std::vector<shape>& inputs) const { return inputs.front(); }
argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
visit_all(result, args[0], args[1])([&](auto output, auto input1, auto input2) {
if(input1.get_shape().packed() and input2.get_shape().packed())
{
std::transform(
input1.begin(), input1.end(), input2.begin(), output.begin(), op.fcn());
}
else
{
shape_for_each(output.get_shape(), [&](const auto& idx) {
output(idx.begin(), idx.end()) =
op.fcn()(input1(idx.begin(), idx.end()), input2(idx.begin(), idx.end()));
});
}
});
return result;
}
};
struct cpu_apply struct cpu_apply
{ {
program* prog; program* prog;
...@@ -814,43 +650,17 @@ struct cpu_apply ...@@ -814,43 +650,17 @@ struct cpu_apply
void init() void init()
{ {
apply_map["im2col"] = extend_op<cpu_im2col, op::im2col>();
apply_map["convolution"] = extend_op<cpu_convolution, op::convolution>();
apply_map["dot"] = extend_op<cpu_gemm, op::dot>();
apply_map["batch_norm_inference"] = apply_map["batch_norm_inference"] =
extend_op<cpu_batch_norm_inference, op::batch_norm_inference>(); extend_op<cpu_batch_norm_inference, op::batch_norm_inference>();
apply_map["lrn"] = extend_op<cpu_lrn, op::lrn>(); apply_map["convolution"] = extend_op<cpu_convolution, op::convolution>();
apply_map["contiguous"] = extend_op<cpu_contiguous, op::contiguous>(); apply_map["dot"] = extend_op<cpu_gemm, op::dot>();
apply_map["pad"] = extend_op<cpu_pad, op::pad>(); apply_map["elu"] = extend_op<cpu_unary<elu_op>, op::elu>();
apply_map["concat"] = extend_op<cpu_concat, op::concat>(); apply_map["im2col"] = extend_op<cpu_im2col, op::im2col>();
apply_map["gather"] = extend_op<cpu_gather, op::gather>(); apply_map["leaky_relu"] = extend_op<cpu_unary<leaky_relu_op>, op::leaky_relu>();
apply_map["logsoftmax"] = extend_op<cpu_logsoftmax, op::logsoftmax>(); apply_map["logsoftmax"] = extend_op<cpu_logsoftmax, op::logsoftmax>();
apply_map["leaky_relu"] = extend_op<cpu_unary<leaky_relu_op>, op::leaky_relu>(); apply_map["lrn"] = extend_op<cpu_lrn, op::lrn>();
apply_map["elu"] = extend_op<cpu_unary<elu_op>, op::elu>(); apply_map["pad"] = extend_op<cpu_pad, op::pad>();
apply_map["identity"] = simple_op<cpu_unary<identity_op>>(); apply_map["softmax"] = simple_op<softmax2d>();
apply_map["abs"] = simple_op<cpu_unary<abs_op>>();
apply_map["sinh"] = simple_op<cpu_unary<sinh_op>>();
apply_map["cosh"] = simple_op<cpu_unary<cosh_op>>();
apply_map["tanh"] = simple_op<cpu_unary<tanh_op>>();
apply_map["sigmoid"] = simple_op<cpu_unary<sigmoid_op>>();
apply_map["exp"] = simple_op<cpu_unary<exp_op>>();
apply_map["log"] = simple_op<cpu_unary<log_op>>();
apply_map["neg"] = simple_op<cpu_unary<neg_op>>();
apply_map["sin"] = simple_op<cpu_unary<sin_op>>();
apply_map["cos"] = simple_op<cpu_unary<cos_op>>();
apply_map["tan"] = simple_op<cpu_unary<tan_op>>();
apply_map["asin"] = simple_op<cpu_unary<asin_op>>();
apply_map["acos"] = simple_op<cpu_unary<acos_op>>();
apply_map["atan"] = simple_op<cpu_unary<atan_op>>();
apply_map["relu"] = simple_op<cpu_unary<relu_op>>();
apply_map["add"] = simple_op<cpu_binary<add_op>>();
apply_map["sub"] = simple_op<cpu_binary<sub_op>>();
apply_map["mul"] = simple_op<cpu_binary<mul_op>>();
apply_map["div"] = simple_op<cpu_binary<div_op>>();
apply_map["max"] = simple_op<cpu_binary<max_op>>();
apply_map["min"] = simple_op<cpu_binary<min_op>>();
apply_map["softmax"] = simple_op<softmax2d>();
} }
void apply() void apply()
...@@ -866,9 +676,18 @@ struct cpu_apply ...@@ -866,9 +676,18 @@ struct cpu_apply
{ {
apply_map.at(it->name())(it); apply_map.at(it->name())(it);
} }
else if(is_context_free(it->get_operator()))
{
apply_cpu_op(it);
}
} }
} }
void apply_cpu_op(instruction_ref ins)
{
prog->replace_instruction(ins, cpu_op{ins->get_operator()}, ins->inputs());
}
template <class T> template <class T>
void apply_simple_op(instruction_ref ins) void apply_simple_op(instruction_ref ins)
{ {
......
...@@ -32,6 +32,7 @@ add_library(migraphx_device ...@@ -32,6 +32,7 @@ add_library(migraphx_device
device/pad.cpp device/pad.cpp
device/gather.cpp device/gather.cpp
device/sub.cpp device/sub.cpp
device/clip.cpp
) )
set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device) set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device)
rocm_clang_tidy_check(migraphx_device) rocm_clang_tidy_check(migraphx_device)
...@@ -66,6 +67,7 @@ add_library(migraphx_gpu ...@@ -66,6 +67,7 @@ add_library(migraphx_gpu
lrn.cpp lrn.cpp
schedule_model.cpp schedule_model.cpp
adjust_allocation.cpp adjust_allocation.cpp
clip.cpp
) )
set_target_properties(migraphx_gpu PROPERTIES EXPORT_NAME gpu) set_target_properties(migraphx_gpu PROPERTIES EXPORT_NAME gpu)
rocm_clang_tidy_check(migraphx_gpu) rocm_clang_tidy_check(migraphx_gpu)
......
...@@ -7,7 +7,7 @@ namespace gpu { ...@@ -7,7 +7,7 @@ namespace gpu {
shape miopen_abs::compute_shape(const std::vector<shape>& inputs) const shape miopen_abs::compute_shape(const std::vector<shape>& inputs) const
{ {
check_shapes{inputs, *this}.has(2).not_broadcasted(); check_shapes{inputs, *this}.has(2).packed();
return inputs.at(0); return inputs.at(0);
} }
......
#include <migraphx/gpu/clip.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/clip.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape hip_clip::compute_shape(std::vector<shape> inputs) const
{
inputs.pop_back();
return op.compute_shape(inputs);
}
argument hip_clip::compute(context& ctx, const shape&, const std::vector<argument>& args) const
{
device::clip(ctx.get_stream().get(), args.back(), args.front(), op.max_val, op.min_val);
return args.back();
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/device/clip.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void clip(hipStream_t stream,
const argument& result,
const argument& arg1,
const float max,
const float min)
{
nary(stream, result, arg1)(
[max, min](auto x) { return std::min<decltype(x)>(std::max<decltype(x)>(min, x), max); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -16,7 +16,7 @@ argument gather(hipStream_t stream, ...@@ -16,7 +16,7 @@ argument gather(hipStream_t stream,
std::vector<migraphx::argument> args, std::vector<migraphx::argument> args,
int axis) int axis)
{ {
int axis_index = (axis < 0) ? (axis + args[0].get_shape().lens().size()) : axis; auto axis_index = (axis < 0) ? (axis + args[0].get_shape().lens().size()) : axis;
visit_all(args.back(), args[0])([&](auto output, auto input) { visit_all(args.back(), args[0])([&](auto output, auto input) {
std::size_t nelements = output_shape.elements(); std::size_t nelements = output_shape.elements();
args[1].visit([&](auto indices) { args[1].visit([&](auto indices) {
......
...@@ -162,7 +162,10 @@ struct hip_triadd ...@@ -162,7 +162,10 @@ struct hip_triadd
device::add(ctx.get_stream().get(), args.at(3), args.at(0), args.at(1), args.at(2)); device::add(ctx.get_stream().get(), args.at(3), args.at(0), args.at(1), args.at(2));
return args.at(3); return args.at(3);
} }
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; } std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
}; };
struct hip_triadd_relu struct hip_triadd_relu
...@@ -178,7 +181,10 @@ struct hip_triadd_relu ...@@ -178,7 +181,10 @@ struct hip_triadd_relu
device::add_relu(ctx.get_stream().get(), args.at(3), args.at(0), args.at(1), args.at(2)); device::add_relu(ctx.get_stream().get(), args.at(3), args.at(0), args.at(1), args.at(2));
return args.at(3); return args.at(3);
} }
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; } std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
}; };
struct hip_add_relu struct hip_add_relu
...@@ -194,7 +200,10 @@ struct hip_add_relu ...@@ -194,7 +200,10 @@ struct hip_add_relu
device::add_relu(ctx.get_stream().get(), args.at(2), args.at(0), args.at(1)); device::add_relu(ctx.get_stream().get(), args.at(2), args.at(0), args.at(1));
return args.at(2); return args.at(2);
} }
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; } std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
}; };
struct find_add_relu struct find_add_relu
...@@ -285,7 +294,10 @@ struct miopen_conv_bias ...@@ -285,7 +294,10 @@ struct miopen_conv_bias
void finalize(context& ctx, const shape&, const std::vector<shape>&) { f.compile(ctx); } void finalize(context& ctx, const shape&, const std::vector<shape>&) { f.compile(ctx); }
shape get_workspace(context& ctx) { return f.get_workspace(ctx); } shape get_workspace(context& ctx) { return f.get_workspace(ctx); }
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; } std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
}; };
struct miopen_conv_bias_relu struct miopen_conv_bias_relu
...@@ -332,7 +344,10 @@ struct miopen_conv_bias_relu ...@@ -332,7 +344,10 @@ struct miopen_conv_bias_relu
} }
void finalize(context& ctx, const shape&, const std::vector<shape>&) { f.compile(ctx); } void finalize(context& ctx, const shape&, const std::vector<shape>&) { f.compile(ctx); }
shape get_workspace(context& ctx) { return f.get_workspace(ctx); } shape get_workspace(context& ctx) { return f.get_workspace(ctx); }
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; } std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
}; };
template <class... Ms> template <class... Ms>
......
...@@ -13,11 +13,21 @@ struct context; ...@@ -13,11 +13,21 @@ struct context;
struct miopen_abs struct miopen_abs
{ {
shared<activation_descriptor> ad; shared<activation_descriptor> ad;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return gpu::reflect(self.ad.get(), f);
}
std::string name() const { return "gpu::abs"; } std::string name() const { return "gpu::abs"; }
shape compute_shape(const std::vector<shape>& inputs) const; shape compute_shape(const std::vector<shape>& inputs) const;
argument argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const; compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; } std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
}; };
} // namespace gpu } // namespace gpu
......
...@@ -13,11 +13,21 @@ struct context; ...@@ -13,11 +13,21 @@ struct context;
struct miopen_batch_norm_inference struct miopen_batch_norm_inference
{ {
op::batch_norm_inference op; op::batch_norm_inference op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "gpu::batch_norm_inference"; } std::string name() const { return "gpu::batch_norm_inference"; }
shape compute_shape(const std::vector<shape>& inputs) const; shape compute_shape(const std::vector<shape>& inputs) const;
argument argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const; compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; } std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
}; };
} // namespace gpu } // namespace gpu
......
#ifndef MIGRAPHX_GUARD_RTGLIB_CLIP_HPP
#define MIGRAPHX_GUARD_RTGLIB_CLIP_HPP
#include <migraphx/shape.hpp>
#include <migraphx/op/clip.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct context;
struct hip_clip
{
op::clip op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "gpu::clip"; }
shape compute_shape(std::vector<shape> inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
...@@ -14,11 +14,20 @@ struct hip_concat ...@@ -14,11 +14,20 @@ struct hip_concat
{ {
op::concat op; op::concat op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "gpu::concat"; } std::string name() const { return "gpu::concat"; }
shape compute_shape(std::vector<shape> inputs) const; shape compute_shape(std::vector<shape> inputs) const;
argument argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const; compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; } std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
}; };
} // namespace gpu } // namespace gpu
......
...@@ -13,10 +13,20 @@ struct context; ...@@ -13,10 +13,20 @@ struct context;
struct miopen_contiguous struct miopen_contiguous
{ {
op::contiguous op; op::contiguous op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "gpu::contiguous"; } std::string name() const { return "gpu::contiguous"; }
shape compute_shape(const std::vector<shape>& inputs) const; shape compute_shape(const std::vector<shape>& inputs) const;
argument compute(context&, shape output_shape, const std::vector<argument>& args) const; argument compute(context&, shape output_shape, const std::vector<argument>& args) const;
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; } std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
}; };
} // namespace gpu } // namespace gpu
......
...@@ -31,7 +31,10 @@ struct miopen_convolution ...@@ -31,7 +31,10 @@ struct miopen_convolution
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const; compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
shape compile(context& ctx, const shape& output_shape, std::vector<shape> inputs); shape compile(context& ctx, const shape& output_shape, std::vector<shape> inputs);
void finalize(context& ctx, const shape& output_shape, std::vector<shape> inputs); void finalize(context& ctx, const shape& output_shape, std::vector<shape> inputs);
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; } std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
}; };
} // namespace gpu } // namespace gpu
......
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_CLIP_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_CLIP_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void clip(hipStream_t stream, const argument& result, const argument& arg1, float max, float min);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
...@@ -13,11 +13,21 @@ struct context; ...@@ -13,11 +13,21 @@ struct context;
struct miopen_elu struct miopen_elu
{ {
shared<activation_descriptor> ad; shared<activation_descriptor> ad;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return gpu::reflect(self.ad.get(), f);
}
std::string name() const { return "gpu::elu"; } std::string name() const { return "gpu::elu"; }
shape compute_shape(const std::vector<shape>& inputs) const; shape compute_shape(const std::vector<shape>& inputs) const;
argument argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const; compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; } std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
}; };
} // namespace gpu } // namespace gpu
......
...@@ -14,11 +14,21 @@ struct context; ...@@ -14,11 +14,21 @@ struct context;
struct hip_gather struct hip_gather
{ {
op::gather op; op::gather op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "gpu::gather"; } std::string name() const { return "gpu::gather"; }
shape compute_shape(std::vector<shape> inputs) const; shape compute_shape(std::vector<shape> inputs) const;
argument argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const; compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; } std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
}; };
} // namespace gpu } // namespace gpu
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment