Commit bf6f82d8 authored by Paul's avatar Paul
Browse files

Merge from develop

parents 6a0797e2 b93f5320
......@@ -63,11 +63,11 @@ bool memory_coloring_impl::allocate(interval_ptr interval)
}
}
long long offset = 0;
std::size_t offset = 0;
while(!conflict_queue.empty())
{
live_range* range = conflict_queue.top();
long long iter_offset = range->offset;
std::size_t iter_offset = range->offset;
if(offset > iter_offset)
{
offset = std::max(offset, iter_offset + range->size);
......@@ -97,7 +97,7 @@ void memory_coloring_impl::build()
if(num_of_instrs == 0)
return;
int cur_points = num_of_instrs * 2;
auto cur_points = num_of_instrs * 2;
instruction_ref iter = p_program->end();
instruction_ref begin = p_program->begin();
std::vector<instruction_ref> dead_instrs;
......@@ -193,13 +193,13 @@ void memory_coloring_impl::rewrite()
continue;
std::size_t offset = 0;
if(interval->get_offset() == invalid_offset)
if(interval->get_offset() != invalid_offset)
{
assert(interval->result.bytes() == 0);
offset = interval->get_offset();
}
else
{
offset = interval->get_offset();
assert(interval->result.bytes() == 0);
}
if(is_allocate(ins))
......@@ -207,15 +207,6 @@ void memory_coloring_impl::rewrite()
p_program->replace_instruction(
ins, op::load{ins->get_shape(), offset}, scratch_param);
}
else if(is_literal(ins))
{
#if 0
auto pre = p_program->add_literal(ins->lit);
bool pre_copy = (interval->get_begin() < earliest_end_point);
p_program->replace_instruction(
ins, write_literal{offset, pre_copy}, scratch_param, pre);
#endif
}
}
}
MIGRAPHX_DEBUG(dump("---After rewrite---"));
......
......@@ -21,15 +21,15 @@
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
static const int invalid_offset = -1;
static const std::size_t invalid_offset = std::numeric_limits<std::size_t>::max();
struct live_range
{
int begin; // begin point in the instruction stream.
int end; // end point in the instruction stream.
long long offset; // offset to base pointer of allocated memory trunk.
int vn; // value number that identifies this live_range.
long long size; // size of required memory in bytes
std::size_t begin; // begin point in the instruction stream.
std::size_t end; // end point in the instruction stream.
std::size_t offset; // offset to base pointer of allocated memory trunk.
std::size_t vn; // value number that identifies this live_range.
std::size_t size; // size of required memory in bytes
#ifdef MIGRAPHX_DEBUG_OPT
void dump();
#endif
......@@ -45,9 +45,9 @@ struct live_interval
is_live_on_entry = false;
}
void add_use(int use) { use_points.push_front(use); }
int get_begin() const { return segment.begin; }
int get_end() const { return segment.end; }
void add_use(std::size_t use) { use_points.push_front(use); }
std::size_t get_begin() const { return segment.begin; }
std::size_t get_end() const { return segment.end; }
long long get_offset() const { return segment.offset; }
#ifdef MIGRAPHX_DEBUG_OPT
......@@ -55,9 +55,9 @@ struct live_interval
#endif
live_range segment;
int id;
std::list<int> use_points;
int def_point;
std::size_t id;
std::list<std::size_t> use_points;
std::size_t def_point;
shape result;
bool is_literal;
bool is_live_on_entry;
......@@ -111,8 +111,8 @@ struct memory_coloring_impl
{
if((range1.size == 0) || (range2.size == 0))
return false;
long long end1 = range1.offset + range1.size - 1;
long long end2 = range2.offset + range2.size - 1;
auto end1 = range1.offset + range1.size - 1;
auto end2 = range2.offset + range2.size - 1;
return ((end1 < range2.offset) || (end2 < range1.offset));
}
void verify();
......@@ -125,8 +125,8 @@ struct memory_coloring_impl
{
bool operator()(const interval_ptr i1, const interval_ptr i2) const
{
int len1 = i1->get_end() - i1->get_begin();
int len2 = i2->get_end() - i2->get_begin();
auto len1 = i1->get_end() - i1->get_begin();
auto len2 = i2->get_end() - i2->get_begin();
if(len1 != len2)
{
return (len1 < len2);
......@@ -158,7 +158,7 @@ struct memory_coloring_impl
int num_of_lives;
int max_value_number;
long long required_bytes;
std::size_t required_bytes;
// The earliest program point where an live interval ends.
int earliest_end_point;
// The latest program point where an live interval ends.
......
......@@ -63,11 +63,16 @@ static void print_program(const program& p, F print_func)
for(auto ins : iterator_for(p))
{
std::string var_name = "@" + std::to_string(count);
std::string var_name;
if(ins->name() == "@param")
{
var_name = any_cast<builtin::param>(ins->get_operator()).parameter;
}
else
{
var_name = "@" + std::to_string(count);
count++;
}
names.emplace(ins, var_name);
// TODO: Use all_of
......@@ -78,17 +83,77 @@ static void print_program(const program& p, F print_func)
}
print_func(ins, names);
count++;
}
}
program::program() : impl(std::make_unique<program_impl>()) {}
program::program(program&&) noexcept = default;
program& program::operator=(program&&) noexcept = default;
program::~program() noexcept = default;
// copy constructor
program::program(const program& p) { assign(p); }
// copy assignment operator
program& program::operator=(program p)
{
std::swap(p.impl, this->impl);
return *this;
}
void program::assign(const program& p)
{
// clean the current program
if(!impl)
{
impl = std::make_unique<program_impl>();
}
else if(!impl->instructions.empty())
{
impl->instructions.clear();
}
impl->ctx = p.impl->ctx;
std::unordered_map<instruction_ref, instruction_ref> ins_map;
for(auto ins : iterator_for(p))
{
instruction_ref copy_ins{};
if(ins->name() == "@literal")
{
auto l = ins->get_literal();
copy_ins = impl->instructions.insert(impl->instructions.end(), instruction{l});
}
else if(ins->name() == "@param")
{
auto&& name = any_cast<builtin::param>(ins->get_operator()).parameter;
auto s = ins->get_shape();
copy_ins = impl->instructions.insert(impl->instructions.end(),
{builtin::param{name}, std::move(s), {}});
}
else if(ins->name() == "@outline")
{
auto s = ins->get_shape();
copy_ins =
impl->instructions.insert(impl->instructions.end(), {builtin::outline{s}, s, {}});
}
else
{
// retrieve its mapped input
auto inputs = ins->inputs();
// ensure all inputs have its corresponding copy instructions
assert(std::all_of(
inputs.begin(), inputs.end(), [&](auto i) { return ins_map.count(i) > 0; }));
std::vector<instruction_ref> copy_inputs(inputs.size());
std::transform(inputs.begin(), inputs.end(), copy_inputs.begin(), [&](auto i) {
return ins_map[i];
});
copy_ins = add_instruction(ins->get_operator(), copy_inputs);
}
ins_map[ins] = copy_ins;
}
}
instruction_ref program::add_instruction(const operation& op, std::vector<instruction_ref> args)
{
return insert_instruction(impl->instructions.end(), op, std::move(args));
......
......@@ -22,22 +22,32 @@ bool skip_propogate(instruction_ref ins)
void propagate_constant::apply(program& p) const
{
for(auto i : iterator_for(p))
{
if(i->name() != "@literal")
continue;
if(i->outputs().empty())
continue;
fix([&](auto self, auto ins) {
if(not skip_propogate(ins))
std::unordered_set<instruction_ref> children(ins->outputs().begin(),
ins->outputs().end());
for(auto child : children)
{
auto r = ins->eval();
if(skip_propogate(child))
{
self(child);
continue;
}
auto r = child->eval();
if(not r.empty())
{
assert(r.get_shape() == ins->get_shape());
assert(r.get_shape() == child->get_shape());
auto l = p.add_literal(r.get_shape(), r.data());
p.replace_instruction(ins, l);
return;
self(p.replace_instruction(child, l));
}
}
std::unordered_set<instruction_ref> children(ins->inputs().begin(), ins->inputs().end());
for(auto child : children)
self(child);
})(std::prev(p.end()));
})(i);
}
}
} // namespace MIGRAPHX_INLINE_NS
......
......@@ -48,6 +48,12 @@ struct cpu_batch_norm_inference
{
op::batch_norm_inference op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "cpu::batch_norm_inference"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
......@@ -107,6 +113,12 @@ struct cpu_lrn
{
op::lrn op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "cpu::lrn"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, shape output_shape, std::vector<argument> args) const
......@@ -117,7 +129,7 @@ struct cpu_lrn
int channels = output_shape.lens()[1];
int height = output_shape.lens()[2];
int width = output_shape.lens()[3];
float alphaoverarea = op.alpha / op.size;
float alphaoverarea = op.alpha / float(op.size);
int radius = (op.size - 1) / 2;
par_dfor(n_batch, height, width)([&](int b, int h, int w) {
......@@ -144,6 +156,12 @@ struct cpu_convolution
{
op::convolution op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "cpu::convolution"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, shape output_shape, std::vector<argument> args) const
......@@ -165,15 +183,15 @@ struct cpu_convolution
output_shape.lens()[2],
output_shape.lens()[3])(
[&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) {
const int start_x = i * op.stride[0] - op.padding[0];
const int start_y = j * op.stride[1] - op.padding[1];
const int group_id = w / (wei_n / op.group);
const auto start_x = i * op.stride[0] - op.padding[0];
const auto start_y = j * op.stride[1] - op.padding[1];
const auto group_id = w / (wei_n / op.group);
double acc = 0;
dfor(wei_c, wei_h, wei_w)([&](std::size_t k, std::size_t x, std::size_t y) {
const int in_x = start_x + x;
const int in_y = start_y + y;
const int in_ch = group_id * wei_c + k;
const auto in_x = start_x + x;
const auto in_y = start_y + y;
const auto in_ch = group_id * wei_c + k;
if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w)
{
acc += input(o, in_ch, in_x, in_y) * weights(w, k, x, y);
......@@ -190,6 +208,12 @@ struct cpu_im2col
{
op::im2col op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
static std::string name() { return "cpu::im2col"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
......@@ -209,10 +233,8 @@ struct cpu_im2col
const std::size_t& stride_h = op.stride[0];
const std::size_t& stride_w = op.stride[1];
int kdiv2_h;
int kdiv2_w;
kdiv2_h = kernel_h / 2;
kdiv2_w = kernel_w / 2;
auto kdiv2_h = kernel_h / 2;
auto kdiv2_w = kernel_w / 2;
// calculate output sizes
const std::size_t col_height = (height - kernel_h + 2 * pad_h) / stride_h + 1;
const std::size_t col_width = (width - kernel_w + 2 * pad_w) / stride_w + 1;
......@@ -230,8 +252,8 @@ struct cpu_im2col
dfor(channels,
kernel_h,
kernel_w)([&](std::size_t c, std::size_t koffset, std::size_t loffset) {
int idx = iinput + koffset - kdiv2_h;
int jdx = jinput + loffset - kdiv2_w;
auto idx = iinput + koffset - kdiv2_h;
auto jdx = jinput + loffset - kdiv2_w;
col(ldx, p) = ((idx >= 0) && (idx < height) && (jdx >= 0) && (jdx < width))
? input(0, c, idx, jdx)
: 0;
......@@ -273,6 +295,12 @@ struct cpu_pooling
{
op::pooling op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "cpu::pooling_" + Op::name(); }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, const shape& output_shape, std::vector<argument> args) const
......@@ -317,20 +345,35 @@ struct cpu_pooling
}
};
struct cpu_contiguous
struct cpu_op
{
op::contiguous op;
std::string name() const { return "cpu::contiguous"; }
operation op;
std::string name() const { return "cpu::" + op.name(); }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, const shape& output_shape, std::vector<argument> args) const
argument compute(context&, const shape& output_shape, const std::vector<argument>& args) const
{
return op.compute(output_shape, std::move(args));
return op.compute(output_shape, args);
}
friend bool operator==(const cpu_op& x, const cpu_op& y) { return x.op == y.op; }
friend bool operator==(const cpu_op& x, const operation& y)
{
if(x.name() != y.name())
return false;
return x == any_cast<cpu_op>(y);
}
friend bool operator==(const operation& x, const cpu_op& y) { return y == x; }
};
struct cpu_pad
{
op::pad op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "cpu::contiguous"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, const shape& output_shape, std::vector<argument> args) const
......@@ -354,20 +397,15 @@ struct cpu_pad
}
};
struct cpu_concat
{
op::concat op;
std::string name() const { return "cpu::concat"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{
return op.compute(output_shape, std::move(args));
}
};
struct cpu_gemm
{
op::dot op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "cpu::dot"; }
shape compute_shape(const std::vector<shape>& inputs) const
{
......@@ -410,162 +448,6 @@ struct cpu_gemm
}
};
struct cpu_gather
{
op::gather op;
std::string name() const { return "cpu::gather"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{
return op.compute(output_shape, std::move(args));
}
};
struct identity_op
{
std::string name() const { return "cpu::identity"; }
auto fcn() const
{
return [](auto x) { return x; };
}
};
struct abs_op
{
std::string name() const { return "cpu::abs"; }
auto fcn() const
{
return [](auto x) { return std::abs(make_signed(x)); };
}
};
struct exp_op
{
std::string name() const { return "cpu::exp"; }
auto fcn() const
{
return [](auto x) { return std::exp(x); };
}
};
struct log_op
{
std::string name() const { return "cpu::log"; }
auto fcn() const
{
return [](auto x) { return std::log(x); };
}
};
struct sin_op
{
std::string name() const { return "cpu::sin"; }
auto fcn() const
{
return [](auto x) { return std::sin(x); };
}
};
struct cos_op
{
std::string name() const { return "cpu::cos"; }
auto fcn() const
{
return [](auto x) { return std::cos(x); };
}
};
struct tan_op
{
std::string name() const { return "cpu::tan"; }
auto fcn() const
{
return [](auto x) { return std::tan(x); };
}
};
struct asin_op
{
std::string name() const { return "cpu::asin"; }
auto fcn() const
{
return [](auto x) { return std::asin(x); };
}
};
struct acos_op
{
std::string name() const { return "cpu::acos"; }
auto fcn() const
{
return [](auto x) { return std::acos(x); };
}
};
struct atan_op
{
std::string name() const { return "cpu::atan"; }
auto fcn() const
{
return [](auto x) { return std::atan(x); };
}
};
struct sinh_op
{
std::string name() const { return "cpu::sinh"; }
auto fcn() const
{
return [](auto x) { return std::sinh(x); };
}
};
struct cosh_op
{
std::string name() const { return "cpu::cosh"; }
auto fcn() const
{
return [](auto x) { return std::cosh(x); };
}
};
struct tanh_op
{
std::string name() const { return "cpu::tanh"; }
auto fcn() const
{
return [](auto x) { return std::tanh(x); };
}
};
struct sigmoid_op
{
std::string name() const { return "cpu::sigmoid"; }
auto fcn() const
{
return [](auto x) { return 1.f / (1.f + std::exp(-x)); };
}
};
struct neg_op
{
std::string name() const { return "cpu::neg"; }
auto fcn() const
{
return [](auto x) { return -x; };
}
};
struct relu_op
{
std::string name() const { return "cpu::relu"; }
auto fcn() const
{
return [](auto x) { return std::max(decltype(x){0}, x); };
}
};
struct leaky_relu_op
{
op::leaky_relu op;
......@@ -592,14 +474,42 @@ template <typename Op>
struct cpu_unary
{
Op op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op.op, f);
}
std::string name() const { return op.name(); }
shape compute_shape(const std::vector<shape>& inputs) const { return inputs.front(); }
shape compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs}.has(1);
auto s = inputs.at(0);
if(s.packed())
{
return s;
}
else
{
return {s.type(), s.lens()};
}
}
argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
result.visit([&](auto output) {
args[0].visit([&](auto input) {
if(input.get_shape().standard())
{
std::transform(input.begin(), input.end(), output.begin(), op.fcn());
}
else
{
shape_for_each(output.get_shape(), [&](const auto& idx) {
output(idx.begin(), idx.end()) = op.fcn()(input(idx.begin(), idx.end()));
});
}
});
});
......@@ -622,20 +532,20 @@ struct softmax2d
auto nw = input.get_shape().lens()[3];
dfor(nb, nh, nw)([&](std::size_t b, std::size_t i, std::size_t j) {
value_type cmax = std::numeric_limits<value_type>::lowest();
for(int c = 0; c < nc; c++)
for(std::size_t c = 0; c < nc; c++)
{
cmax = std::max(cmax, input(b, c, i, j));
}
for(int c = 0; c < nc; c++)
for(std::size_t c = 0; c < nc; c++)
{
output(b, c, i, j) = std::exp(input(b, c, i, j) - cmax);
}
value_type sum = value_type(0);
for(int c = 0; c < nc; c++)
for(std::size_t c = 0; c < nc; c++)
{
sum += output(b, c, i, j);
}
for(int c = 0; c < nc; c++)
for(std::size_t c = 0; c < nc; c++)
{
output(b, c, i, j) = output(b, c, i, j) / sum;
}
......@@ -648,6 +558,13 @@ struct softmax2d
struct cpu_logsoftmax
{
op::logsoftmax op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "cpu::logsoftmax"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
......@@ -714,87 +631,6 @@ struct cpu_logsoftmax
}
};
struct add_op
{
std::string name() const { return "add"; }
auto fcn() const
{
return [](auto x, auto y) { return x + y; };
}
};
struct sub_op
{
std::string name() const { return "sub"; }
auto fcn() const
{
return [](auto x, auto y) { return x - y; };
}
};
struct mul_op
{
std::string name() const { return "mul"; }
auto fcn() const
{
return [](auto x, auto y) { return x * y; };
}
};
struct div_op
{
std::string name() const { return "div"; }
auto fcn() const
{
return [](auto x, auto y) { return x / y; };
}
};
struct max_op
{
std::string name() const { return "max"; }
auto fcn() const
{
return [](auto x, auto y) { return std::max(x, y); };
}
};
struct min_op
{
std::string name() const { return "min"; }
auto fcn() const
{
return [](auto x, auto y) { return std::min(x, y); };
}
};
template <typename Op>
struct cpu_binary
{
Op op;
std::string name() const { return op.name(); }
shape compute_shape(const std::vector<shape>& inputs) const { return inputs.front(); }
argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
visit_all(result, args[0], args[1])([&](auto output, auto input1, auto input2) {
if(input1.get_shape().packed() and input2.get_shape().packed())
{
std::transform(
input1.begin(), input1.end(), input2.begin(), output.begin(), op.fcn());
}
else
{
shape_for_each(output.get_shape(), [&](const auto& idx) {
output(idx.begin(), idx.end()) =
op.fcn()(input1(idx.begin(), idx.end()), input2(idx.begin(), idx.end()));
});
}
});
return result;
}
};
struct cpu_apply
{
program* prog;
......@@ -814,42 +650,16 @@ struct cpu_apply
void init()
{
apply_map["im2col"] = extend_op<cpu_im2col, op::im2col>();
apply_map["convolution"] = extend_op<cpu_convolution, op::convolution>();
apply_map["dot"] = extend_op<cpu_gemm, op::dot>();
apply_map["batch_norm_inference"] =
extend_op<cpu_batch_norm_inference, op::batch_norm_inference>();
apply_map["convolution"] = extend_op<cpu_convolution, op::convolution>();
apply_map["dot"] = extend_op<cpu_gemm, op::dot>();
apply_map["elu"] = extend_op<cpu_unary<elu_op>, op::elu>();
apply_map["im2col"] = extend_op<cpu_im2col, op::im2col>();
apply_map["leaky_relu"] = extend_op<cpu_unary<leaky_relu_op>, op::leaky_relu>();
apply_map["logsoftmax"] = extend_op<cpu_logsoftmax, op::logsoftmax>();
apply_map["lrn"] = extend_op<cpu_lrn, op::lrn>();
apply_map["contiguous"] = extend_op<cpu_contiguous, op::contiguous>();
apply_map["pad"] = extend_op<cpu_pad, op::pad>();
apply_map["concat"] = extend_op<cpu_concat, op::concat>();
apply_map["gather"] = extend_op<cpu_gather, op::gather>();
apply_map["logsoftmax"] = extend_op<cpu_logsoftmax, op::logsoftmax>();
apply_map["leaky_relu"] = extend_op<cpu_unary<leaky_relu_op>, op::leaky_relu>();
apply_map["elu"] = extend_op<cpu_unary<elu_op>, op::elu>();
apply_map["identity"] = simple_op<cpu_unary<identity_op>>();
apply_map["abs"] = simple_op<cpu_unary<abs_op>>();
apply_map["sinh"] = simple_op<cpu_unary<sinh_op>>();
apply_map["cosh"] = simple_op<cpu_unary<cosh_op>>();
apply_map["tanh"] = simple_op<cpu_unary<tanh_op>>();
apply_map["sigmoid"] = simple_op<cpu_unary<sigmoid_op>>();
apply_map["exp"] = simple_op<cpu_unary<exp_op>>();
apply_map["log"] = simple_op<cpu_unary<log_op>>();
apply_map["neg"] = simple_op<cpu_unary<neg_op>>();
apply_map["sin"] = simple_op<cpu_unary<sin_op>>();
apply_map["cos"] = simple_op<cpu_unary<cos_op>>();
apply_map["tan"] = simple_op<cpu_unary<tan_op>>();
apply_map["asin"] = simple_op<cpu_unary<asin_op>>();
apply_map["acos"] = simple_op<cpu_unary<acos_op>>();
apply_map["atan"] = simple_op<cpu_unary<atan_op>>();
apply_map["relu"] = simple_op<cpu_unary<relu_op>>();
apply_map["add"] = simple_op<cpu_binary<add_op>>();
apply_map["sub"] = simple_op<cpu_binary<sub_op>>();
apply_map["mul"] = simple_op<cpu_binary<mul_op>>();
apply_map["div"] = simple_op<cpu_binary<div_op>>();
apply_map["max"] = simple_op<cpu_binary<max_op>>();
apply_map["min"] = simple_op<cpu_binary<min_op>>();
apply_map["softmax"] = simple_op<softmax2d>();
}
......@@ -866,9 +676,18 @@ struct cpu_apply
{
apply_map.at(it->name())(it);
}
else if(is_context_free(it->get_operator()))
{
apply_cpu_op(it);
}
}
}
void apply_cpu_op(instruction_ref ins)
{
prog->replace_instruction(ins, cpu_op{ins->get_operator()}, ins->inputs());
}
template <class T>
void apply_simple_op(instruction_ref ins)
{
......
......@@ -32,6 +32,7 @@ add_library(migraphx_device
device/pad.cpp
device/gather.cpp
device/sub.cpp
device/clip.cpp
)
set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device)
rocm_clang_tidy_check(migraphx_device)
......@@ -66,6 +67,7 @@ add_library(migraphx_gpu
lrn.cpp
schedule_model.cpp
adjust_allocation.cpp
clip.cpp
)
set_target_properties(migraphx_gpu PROPERTIES EXPORT_NAME gpu)
rocm_clang_tidy_check(migraphx_gpu)
......
......@@ -7,7 +7,7 @@ namespace gpu {
shape miopen_abs::compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(2).not_broadcasted();
check_shapes{inputs, *this}.has(2).packed();
return inputs.at(0);
}
......
#include <migraphx/gpu/clip.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/clip.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape hip_clip::compute_shape(std::vector<shape> inputs) const
{
inputs.pop_back();
return op.compute_shape(inputs);
}
argument hip_clip::compute(context& ctx, const shape&, const std::vector<argument>& args) const
{
device::clip(ctx.get_stream().get(), args.back(), args.front(), op.max_val, op.min_val);
return args.back();
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/device/clip.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void clip(hipStream_t stream,
const argument& result,
const argument& arg1,
const float max,
const float min)
{
nary(stream, result, arg1)(
[max, min](auto x) { return std::min<decltype(x)>(std::max<decltype(x)>(min, x), max); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -16,7 +16,7 @@ argument gather(hipStream_t stream,
std::vector<migraphx::argument> args,
int axis)
{
int axis_index = (axis < 0) ? (axis + args[0].get_shape().lens().size()) : axis;
auto axis_index = (axis < 0) ? (axis + args[0].get_shape().lens().size()) : axis;
visit_all(args.back(), args[0])([&](auto output, auto input) {
std::size_t nelements = output_shape.elements();
args[1].visit([&](auto indices) {
......
......@@ -162,7 +162,10 @@ struct hip_triadd
device::add(ctx.get_stream().get(), args.at(3), args.at(0), args.at(1), args.at(2));
return args.at(3);
}
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
struct hip_triadd_relu
......@@ -178,7 +181,10 @@ struct hip_triadd_relu
device::add_relu(ctx.get_stream().get(), args.at(3), args.at(0), args.at(1), args.at(2));
return args.at(3);
}
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
struct hip_add_relu
......@@ -194,7 +200,10 @@ struct hip_add_relu
device::add_relu(ctx.get_stream().get(), args.at(2), args.at(0), args.at(1));
return args.at(2);
}
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
struct find_add_relu
......@@ -285,7 +294,10 @@ struct miopen_conv_bias
void finalize(context& ctx, const shape&, const std::vector<shape>&) { f.compile(ctx); }
shape get_workspace(context& ctx) { return f.get_workspace(ctx); }
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
struct miopen_conv_bias_relu
......@@ -332,7 +344,10 @@ struct miopen_conv_bias_relu
}
void finalize(context& ctx, const shape&, const std::vector<shape>&) { f.compile(ctx); }
shape get_workspace(context& ctx) { return f.get_workspace(ctx); }
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
template <class... Ms>
......
......@@ -13,11 +13,21 @@ struct context;
struct miopen_abs
{
shared<activation_descriptor> ad;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return gpu::reflect(self.ad.get(), f);
}
std::string name() const { return "gpu::abs"; }
shape compute_shape(const std::vector<shape>& inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
......
......@@ -13,11 +13,21 @@ struct context;
struct miopen_batch_norm_inference
{
op::batch_norm_inference op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "gpu::batch_norm_inference"; }
shape compute_shape(const std::vector<shape>& inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
......
#ifndef MIGRAPHX_GUARD_RTGLIB_CLIP_HPP
#define MIGRAPHX_GUARD_RTGLIB_CLIP_HPP
#include <migraphx/shape.hpp>
#include <migraphx/op/clip.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct context;
struct hip_clip
{
op::clip op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "gpu::clip"; }
shape compute_shape(std::vector<shape> inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -14,11 +14,20 @@ struct hip_concat
{
op::concat op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "gpu::concat"; }
shape compute_shape(std::vector<shape> inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
......
......@@ -13,10 +13,20 @@ struct context;
struct miopen_contiguous
{
op::contiguous op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "gpu::contiguous"; }
shape compute_shape(const std::vector<shape>& inputs) const;
argument compute(context&, shape output_shape, const std::vector<argument>& args) const;
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
......
......@@ -31,7 +31,10 @@ struct miopen_convolution
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
shape compile(context& ctx, const shape& output_shape, std::vector<shape> inputs);
void finalize(context& ctx, const shape& output_shape, std::vector<shape> inputs);
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
......
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_CLIP_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_CLIP_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void clip(hipStream_t stream, const argument& result, const argument& arg1, float max, float min);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -13,11 +13,21 @@ struct context;
struct miopen_elu
{
shared<activation_descriptor> ad;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return gpu::reflect(self.ad.get(), f);
}
std::string name() const { return "gpu::elu"; }
shape compute_shape(const std::vector<shape>& inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
......
......@@ -14,11 +14,21 @@ struct context;
struct hip_gather
{
op::gather op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "gpu::gather"; }
shape compute_shape(std::vector<shape> inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment