Commit 3a848f0d authored by Paul's avatar Paul
Browse files

Merge branch 'develop' into doc2

parents 64e8e30a d1e945da
...@@ -87,13 +87,15 @@ struct program ...@@ -87,13 +87,15 @@ struct program
instruction_ref add_parameter(std::string name, shape s); instruction_ref add_parameter(std::string name, shape s);
instruction_ref add_return(std::vector<instruction_ref> args);
shape get_parameter_shape(std::string name) const; shape get_parameter_shape(std::string name) const;
instruction_ref get_parameter(std::string name) const; instruction_ref get_parameter(std::string name) const;
std::unordered_map<std::string, shape> get_parameter_shapes() const; std::unordered_map<std::string, shape> get_parameter_shapes() const;
argument eval(parameter_map params) const; std::vector<argument> eval(parameter_map params) const;
bool has_instruction(instruction_ref ins) const; bool has_instruction(instruction_ref ins) const;
...@@ -101,7 +103,7 @@ struct program ...@@ -101,7 +103,7 @@ struct program
instruction_ref begin() const; instruction_ref begin() const;
instruction_ref end() const; instruction_ref end() const;
shape get_shape() const; std::vector<shape> get_output_shapes() const;
context& get_context() const; context& get_context() const;
......
...@@ -69,11 +69,17 @@ struct schedule_model ...@@ -69,11 +69,17 @@ struct schedule_model
template <typename PrivateDetailTypeErasedT> template <typename PrivateDetailTypeErasedT>
schedule_model& operator=(PrivateDetailTypeErasedT value) schedule_model& operator=(PrivateDetailTypeErasedT value)
{ {
if(private_detail_te_handle_mem_var.unique()) using std::swap;
*private_detail_te_handle_mem_var = std::forward<PrivateDetailTypeErasedT>(value); auto* derived = this->any_cast<PrivateDetailTypeErasedT>();
else if(!private_detail_te_handle_mem_var) if(derived and private_detail_te_handle_mem_var.unique())
private_detail_te_handle_mem_var = std::make_shared<PrivateDetailTypeErasedT>( {
std::forward<PrivateDetailTypeErasedT>(value)); *derived = std::forward<PrivateDetailTypeErasedT>(value);
}
else
{
schedule_model rhs(value);
swap(private_detail_te_handle_mem_var, rhs.private_detail_te_handle_mem_var);
}
return *this; return *this;
} }
...@@ -81,7 +87,7 @@ struct schedule_model ...@@ -81,7 +87,7 @@ struct schedule_model
template <typename PrivateDetailTypeErasedT> template <typename PrivateDetailTypeErasedT>
PrivateDetailTypeErasedT* any_cast() PrivateDetailTypeErasedT* any_cast()
{ {
return private_detail_te_get_handle().type() == typeid(PrivateDetailTypeErasedT) return this->type_id() == typeid(PrivateDetailTypeErasedT)
? std::addressof(static_cast<private_detail_te_handle_type< ? std::addressof(static_cast<private_detail_te_handle_type<
typename std::remove_cv<PrivateDetailTypeErasedT>::type>&>( typename std::remove_cv<PrivateDetailTypeErasedT>::type>&>(
private_detail_te_get_handle()) private_detail_te_get_handle())
...@@ -92,7 +98,7 @@ struct schedule_model ...@@ -92,7 +98,7 @@ struct schedule_model
template <typename PrivateDetailTypeErasedT> template <typename PrivateDetailTypeErasedT>
const typename std::remove_cv<PrivateDetailTypeErasedT>::type* any_cast() const const typename std::remove_cv<PrivateDetailTypeErasedT>::type* any_cast() const
{ {
return private_detail_te_get_handle().type() == typeid(PrivateDetailTypeErasedT) return this->type_id() == typeid(PrivateDetailTypeErasedT)
? std::addressof(static_cast<const private_detail_te_handle_type< ? std::addressof(static_cast<const private_detail_te_handle_type<
typename std::remove_cv<PrivateDetailTypeErasedT>::type>&>( typename std::remove_cv<PrivateDetailTypeErasedT>::type>&>(
private_detail_te_get_handle()) private_detail_te_get_handle())
......
...@@ -115,11 +115,17 @@ struct target ...@@ -115,11 +115,17 @@ struct target
template <typename PrivateDetailTypeErasedT> template <typename PrivateDetailTypeErasedT>
target& operator=(PrivateDetailTypeErasedT value) target& operator=(PrivateDetailTypeErasedT value)
{ {
if(private_detail_te_handle_mem_var.unique()) using std::swap;
*private_detail_te_handle_mem_var = std::forward<PrivateDetailTypeErasedT>(value); auto* derived = this->any_cast<PrivateDetailTypeErasedT>();
else if(!private_detail_te_handle_mem_var) if(derived and private_detail_te_handle_mem_var.unique())
private_detail_te_handle_mem_var = std::make_shared<PrivateDetailTypeErasedT>( {
std::forward<PrivateDetailTypeErasedT>(value)); *derived = std::forward<PrivateDetailTypeErasedT>(value);
}
else
{
target rhs(value);
swap(private_detail_te_handle_mem_var, rhs.private_detail_te_handle_mem_var);
}
return *this; return *this;
} }
...@@ -127,7 +133,7 @@ struct target ...@@ -127,7 +133,7 @@ struct target
template <typename PrivateDetailTypeErasedT> template <typename PrivateDetailTypeErasedT>
PrivateDetailTypeErasedT* any_cast() PrivateDetailTypeErasedT* any_cast()
{ {
return private_detail_te_get_handle().type() == typeid(PrivateDetailTypeErasedT) return this->type_id() == typeid(PrivateDetailTypeErasedT)
? std::addressof(static_cast<private_detail_te_handle_type< ? std::addressof(static_cast<private_detail_te_handle_type<
typename std::remove_cv<PrivateDetailTypeErasedT>::type>&>( typename std::remove_cv<PrivateDetailTypeErasedT>::type>&>(
private_detail_te_get_handle()) private_detail_te_get_handle())
...@@ -138,7 +144,7 @@ struct target ...@@ -138,7 +144,7 @@ struct target
template <typename PrivateDetailTypeErasedT> template <typename PrivateDetailTypeErasedT>
const typename std::remove_cv<PrivateDetailTypeErasedT>::type* any_cast() const const typename std::remove_cv<PrivateDetailTypeErasedT>::type* any_cast() const
{ {
return private_detail_te_get_handle().type() == typeid(PrivateDetailTypeErasedT) return this->type_id() == typeid(PrivateDetailTypeErasedT)
? std::addressof(static_cast<const private_detail_te_handle_type< ? std::addressof(static_cast<const private_detail_te_handle_type<
typename std::remove_cv<PrivateDetailTypeErasedT>::type>&>( typename std::remove_cv<PrivateDetailTypeErasedT>::type>&>(
private_detail_te_get_handle()) private_detail_te_get_handle())
......
...@@ -7,8 +7,15 @@ ...@@ -7,8 +7,15 @@
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
/// struct to pass in tf options to parser
struct tf_options
{
bool is_nhwc = false;
unsigned int batch_size = 1;
};
/// Create a program from a tf pb file (default is nhwc format) /// Create a program from a tf pb file (default is nhwc format)
program parse_tf(const std::string& name, bool is_nhwc); program parse_tf(const std::string& name, tf_options = tf_options{});
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
...@@ -22,6 +22,9 @@ void instruction::replace(const shape& r) ...@@ -22,6 +22,9 @@ void instruction::replace(const shape& r)
result = r; result = r;
for(auto&& ins : output) for(auto&& ins : output)
{ {
if(ins->name() == "@return")
continue;
assert(ins->name().front() != '@'); assert(ins->name().front() != '@');
ins->recompute_shape(); ins->recompute_shape();
} }
...@@ -70,6 +73,10 @@ bool instruction::valid() const ...@@ -70,6 +73,10 @@ bool instruction::valid() const
{ {
computed = result; computed = result;
} }
else if(op.name() == "@return")
{
computed = {};
}
else else
{ {
try try
...@@ -81,6 +88,7 @@ bool instruction::valid() const ...@@ -81,6 +88,7 @@ bool instruction::valid() const
return false; return false;
} }
} }
return result == computed && std::all_of(output.begin(), output.end(), [&](instruction_ref i) { return result == computed && std::all_of(output.begin(), output.end(), [&](instruction_ref i) {
return std::find(i->inputs().begin(), i->inputs().end(), *this) != i->inputs().end(); return std::find(i->inputs().begin(), i->inputs().end(), *this) != i->inputs().end();
}); });
......
...@@ -73,8 +73,9 @@ int main(int argc, char const* argv[]) ...@@ -73,8 +73,9 @@ int main(int argc, char const* argv[])
for(int i = 0; i < 10; i++) for(int i = 0; i < 10; i++)
{ {
std::cout << "label: " << static_cast<uint32_t>(labels[i]) << " ----> "; std::cout << "label: " << static_cast<uint32_t>(labels[i]) << " ----> ";
m["0"] = migraphx::gpu::to_gpu(migraphx::argument{s, &ptr[3072 * i]}); m["0"] = migraphx::gpu::to_gpu(migraphx::argument{s, &ptr[3072 * i]});
auto result = migraphx::gpu::from_gpu(prog.eval(m)); auto gpu_result = prog.eval(m).back();
auto result = migraphx::gpu::from_gpu(gpu_result);
std::vector<float> logits; std::vector<float> logits;
result.visit([&](auto output) { logits.assign(output.begin(), output.end()); }); result.visit([&](auto output) { logits.assign(output.begin(), output.end()); });
std::vector<float> probs = softmax<float>(logits); std::vector<float> probs = softmax<float>(logits);
...@@ -95,7 +96,7 @@ int main(int argc, char const* argv[]) ...@@ -95,7 +96,7 @@ int main(int argc, char const* argv[])
{ {
std::cout << "label: " << static_cast<uint32_t>(labels[i]) << " ----> "; std::cout << "label: " << static_cast<uint32_t>(labels[i]) << " ----> ";
auto input3 = migraphx::argument{s, &ptr[3072 * i]}; auto input3 = migraphx::argument{s, &ptr[3072 * i]};
auto result = prog.eval({{"0", input3}}); auto result = prog.eval({{"0", input3}}).back();
std::vector<float> logits; std::vector<float> logits;
result.visit([&](auto output) { logits.assign(output.begin(), output.end()); }); result.visit([&](auto output) { logits.assign(output.begin(), output.end()); });
std::vector<float> probs = softmax<float>(logits); std::vector<float> probs = softmax<float>(logits);
......
...@@ -130,8 +130,9 @@ int main(int argc, char const* argv[]) ...@@ -130,8 +130,9 @@ int main(int argc, char const* argv[])
for(int i = 0; i < 20; i++) for(int i = 0; i < 20; i++)
{ {
std::cout << "label: " << labels[i] << " ----> "; std::cout << "label: " << labels[i] << " ----> ";
m["0"] = migraphx::gpu::to_gpu(migraphx::argument{s, &ptr[784 * i]}); m["0"] = migraphx::gpu::to_gpu(migraphx::argument{s, &ptr[784 * i]});
auto result = migraphx::gpu::from_gpu(prog.eval(m)); auto results = prog.eval(m).back();
auto result = migraphx::gpu::from_gpu(results);
std::vector<float> logits; std::vector<float> logits;
result.visit([&](auto output) { logits.assign(output.begin(), output.end()); }); result.visit([&](auto output) { logits.assign(output.begin(), output.end()); });
std::vector<float> probs = softmax(logits); std::vector<float> probs = softmax(logits);
......
This diff is collapsed.
This diff is collapsed.
...@@ -52,7 +52,9 @@ static void print_instruction(std::ostream& os, ...@@ -52,7 +52,9 @@ static void print_instruction(std::ostream& os,
os << ")"; os << ")";
} }
os << " -> " << ins->get_shape(); // skip return instruction shape
if(ins->name() != "@return")
os << " -> " << ins->get_shape();
} }
template <class F> template <class F>
...@@ -147,7 +149,14 @@ void program::assign(const program& p) ...@@ -147,7 +149,14 @@ void program::assign(const program& p)
std::transform(inputs.begin(), inputs.end(), copy_inputs.begin(), [&](auto i) { std::transform(inputs.begin(), inputs.end(), copy_inputs.begin(), [&](auto i) {
return ins_map[i]; return ins_map[i];
}); });
copy_ins = add_instruction(ins->get_operator(), copy_inputs); if(ins->name() == "@return")
{
copy_ins = add_return(copy_inputs);
}
else
{
copy_ins = add_instruction(ins->get_operator(), copy_inputs);
}
} }
ins_map[ins] = copy_ins; ins_map[ins] = copy_ins;
...@@ -270,6 +279,18 @@ instruction_ref program::add_parameter(std::string name, shape s) ...@@ -270,6 +279,18 @@ instruction_ref program::add_parameter(std::string name, shape s)
return impl->instructions.begin(); return impl->instructions.begin();
} }
instruction_ref program::add_return(std::vector<instruction_ref> args)
{
assert(std::all_of(
args.begin(), args.end(), [&](instruction_ref x) { return has_instruction(x); }) &&
"Argument is not an exisiting instruction");
impl->instructions.push_back({builtin::returns{}, {}, args});
auto result = std::prev(impl->instructions.end());
instruction::backreference(result);
assert(result->valid(begin()));
return result;
}
shape program::get_parameter_shape(std::string name) const shape program::get_parameter_shape(std::string name) const
{ {
auto ins = std::find_if( auto ins = std::find_if(
...@@ -334,7 +355,26 @@ std::size_t program::size() const { return impl->instructions.size(); } ...@@ -334,7 +355,26 @@ std::size_t program::size() const { return impl->instructions.size(); }
instruction_ref program::begin() const { return impl->instructions.begin(); } instruction_ref program::begin() const { return impl->instructions.begin(); }
instruction_ref program::end() const { return impl->instructions.end(); } instruction_ref program::end() const { return impl->instructions.end(); }
shape program::get_shape() const { return impl->instructions.back().get_shape(); } std::vector<shape> program::get_output_shapes() const
{
auto last_ins = impl->instructions.back();
if(last_ins.name() == "@return")
{
auto& output_ins = last_ins.inputs();
std::vector<shape> output_shapes;
std::transform(output_ins.begin(),
output_ins.end(),
std::back_inserter(output_shapes),
[](auto& ins) { return ins->get_shape(); });
return output_shapes;
}
// The else branch is to provide backward compatibility
else
{
return {last_ins.get_shape()};
}
}
context& program::get_context() const { return impl->ctx; } context& program::get_context() const { return impl->ctx; }
...@@ -372,10 +412,10 @@ void program::finalize() ...@@ -372,10 +412,10 @@ void program::finalize()
} }
template <class F> template <class F>
argument generic_eval(const program& p, std::vector<argument> generic_eval(const program& p,
context& ctx, context& ctx,
std::unordered_map<std::string, argument> params, std::unordered_map<std::string, argument> params,
F trace) F trace)
{ {
assert(p.validate() == p.end()); assert(p.validate() == p.end());
std::unordered_map<instruction_ref, argument> results; std::unordered_map<instruction_ref, argument> results;
...@@ -407,6 +447,19 @@ argument generic_eval(const program& p, ...@@ -407,6 +447,19 @@ argument generic_eval(const program& p,
{ {
results.emplace(ins, trace(ins, [&] { return argument{ins->get_shape(), nullptr}; })); results.emplace(ins, trace(ins, [&] { return argument{ins->get_shape(), nullptr}; }));
} }
else if(name == "@return")
{
std::vector<argument> prog_outputs;
std::transform(ins->inputs().begin(),
ins->inputs().end(),
std::back_inserter(prog_outputs),
[&](instruction_ref i) {
assert(results.find(i) != results.end());
return results[i];
});
return prog_outputs;
}
else else
{ {
values.resize(ins->inputs().size()); values.resize(ins->inputs().size());
...@@ -421,10 +474,11 @@ argument generic_eval(const program& p, ...@@ -421,10 +474,11 @@ argument generic_eval(const program& p,
} }
assert(results.find(ins) != results.end()); assert(results.find(ins) != results.end());
} }
return results.at(std::prev(p.end()));
return {results.at(std::prev(p.end()))};
} }
argument program::eval(std::unordered_map<std::string, argument> params) const std::vector<argument> program::eval(parameter_map params) const
{ {
auto& ctx = this->impl->ctx; auto& ctx = this->impl->ctx;
#ifndef NDEBUG #ifndef NDEBUG
...@@ -531,6 +585,11 @@ void program::perf_report(std::ostream& os, std::size_t n, parameter_map params) ...@@ -531,6 +585,11 @@ void program::perf_report(std::ostream& os, std::size_t n, parameter_map params)
print_program(*this, [&](auto ins, const auto& names) { print_program(*this, [&](auto ins, const auto& names) {
print_instruction(std::cout, ins, names); print_instruction(std::cout, ins, names);
// skip return instruction
if(ins->name() == "@return")
return;
double avg = common_average(ins_vec[ins]); double avg = common_average(ins_vec[ins]);
double percent = std::ceil(100.0 * avg / total_instruction_time); double percent = std::ceil(100.0 * avg / total_instruction_time);
os << ": " << avg << "ms, " << percent << "%"; os << ": " << avg << "ms, " << percent << "%";
......
...@@ -158,7 +158,7 @@ PYBIND11_MODULE(migraphx, m) ...@@ -158,7 +158,7 @@ PYBIND11_MODULE(migraphx, m)
py::class_<migraphx::program>(m, "program") py::class_<migraphx::program>(m, "program")
.def("clone", [](migraphx::program& p) { return *(new migraphx::program(p)); }) .def("clone", [](migraphx::program& p) { return *(new migraphx::program(p)); })
.def("get_parameter_shapes", &migraphx::program::get_parameter_shapes) .def("get_parameter_shapes", &migraphx::program::get_parameter_shapes)
.def("get_shape", &migraphx::program::get_shape) .def("get_output_shapes", &migraphx::program::get_output_shapes)
.def("compile", .def("compile",
[](migraphx::program& p, const migraphx::target& t, bool offload_copy) { [](migraphx::program& p, const migraphx::target& t, bool offload_copy) {
migraphx::compile_options options; migraphx::compile_options options;
...@@ -173,11 +173,20 @@ PYBIND11_MODULE(migraphx, m) ...@@ -173,11 +173,20 @@ PYBIND11_MODULE(migraphx, m)
.def("__repr__", [](const migraphx::program& p) { return migraphx::to_string(p); }); .def("__repr__", [](const migraphx::program& p) { return migraphx::to_string(p); });
m.def("parse_tf", m.def("parse_tf",
&migraphx::parse_tf, [](const std::string& filename, bool is_nhwc, unsigned int batch_size) {
return migraphx::parse_tf(filename, migraphx::tf_options{is_nhwc, batch_size});
},
"Parse tf protobuf (default format is nhwc)", "Parse tf protobuf (default format is nhwc)",
py::arg("filename"), py::arg("filename"),
py::arg("is_nhwc") = true); py::arg("is_nhwc") = true,
m.def("parse_onnx", &migraphx::parse_onnx); py::arg("batch_size") = 1);
m.def("parse_onnx",
[](const std::string& filename, unsigned int batch_size) {
return migraphx::parse_onnx(filename, migraphx::onnx_options{batch_size});
},
"Parse onnx file",
py::arg("filename"),
py::arg("batch_size") = 1);
m.def("get_target", [](const std::string& name) -> migraphx::target { m.def("get_target", [](const std::string& name) -> migraphx::target {
if(name == "cpu") if(name == "cpu")
......
...@@ -105,6 +105,9 @@ void quantize_fp16(program& prog, const std::vector<std::string>& ins_names) ...@@ -105,6 +105,9 @@ void quantize_fp16(program& prog, const std::vector<std::string>& ins_names)
std::unordered_map<instruction_ref, instruction_ref> map_fp16; std::unordered_map<instruction_ref, instruction_ref> map_fp16;
for(auto ins : iterator_for(prog)) for(auto ins : iterator_for(prog))
{ {
if(ins->name() == "@return")
break;
// all indicates every instruction is converted // all indicates every instruction is converted
if((not contains(ins_names, "all")) and (not contains(ins_names, ins->name()))) if((not contains(ins_names, "all")) and (not contains(ins_names, ins->name())))
{ {
...@@ -335,6 +338,9 @@ void quantize_int8_impl(program& prog, ...@@ -335,6 +338,9 @@ void quantize_int8_impl(program& prog,
std::unordered_map<instruction_ref, std::size_t> map_ins_index; std::unordered_map<instruction_ref, std::size_t> map_ins_index;
for(auto ins : iterator_for(prog)) for(auto ins : iterator_for(prog))
{ {
if(ins->name() == "@return")
break;
if(not contains(ins_names, ins->name())) if(not contains(ins_names, ins->name()))
{ {
continue; continue;
......
...@@ -27,6 +27,15 @@ auto conv_const_weights() ...@@ -27,6 +27,15 @@ auto conv_const_weights()
match::args(match::any(), match::is_constant().bind("w"))); match::args(match::any(), match::is_constant().bind("w")));
} }
MIGRAPHX_PRED_MATCHER(args_has_same_ops, instruction_ref ins)
{
if(ins->inputs().empty())
return true;
return std::all_of(ins->inputs().begin(), ins->inputs().end(), [&](auto j) {
return j->get_operator() == ins->inputs().front()->get_operator();
});
}
struct find_mul_conv struct find_mul_conv
{ {
auto matcher() const auto matcher() const
...@@ -167,6 +176,73 @@ struct find_inner_broadcast ...@@ -167,6 +176,73 @@ struct find_inner_broadcast
} }
}; };
struct find_concat_unary
{
auto matcher() const
{
return match::name("concat")(args_has_same_ops(),
match::arg(0)(match::nargs(1),
match::name("relu", "broadcast").bind("x"),
match::used_once()));
}
void apply(program& p, match::matcher_result r) const
{
auto ins = r.result;
auto x = r.instructions["x"];
auto op = x->get_operator();
auto axis = any_cast<op::concat>(ins->get_operator()).axis;
// Adjust broadcast lens
if(op.name() == "broadcast")
{
auto b = any_cast<op::broadcast>(op);
if(b.axis != axis)
return;
b.broadcast_lens = ins->get_shape().lens();
op = b;
axis = 0;
}
auto inputs = ins->inputs();
std::transform(inputs.begin(), inputs.end(), inputs.begin(), [&](auto i) {
return i->inputs().front();
});
auto concat = p.insert_instruction(ins, op::concat{axis}, inputs);
p.replace_instruction(ins, op, concat);
}
};
struct find_concat_binary
{
auto matcher() const
{
return match::name("concat")(args_has_same_ops(),
match::arg(0)(match::nargs(2),
match::name("add", "multiply").bind("x"),
match::used_once()));
}
void apply(program& p, match::matcher_result r) const
{
auto ins = r.result;
auto x = r.instructions["x"];
auto op = x->get_operator();
auto concat_op = ins->get_operator();
auto xinputs = ins->inputs();
std::transform(xinputs.begin(), xinputs.end(), xinputs.begin(), [&](auto i) {
return i->inputs().front();
});
auto yinputs = ins->inputs();
std::transform(yinputs.begin(), yinputs.end(), yinputs.begin(), [&](auto i) {
return i->inputs().back();
});
auto xconcat = p.insert_instruction(ins, concat_op, xinputs);
auto yconcat = p.insert_instruction(ins, concat_op, yinputs);
p.replace_instruction(ins, op, xconcat, yconcat);
}
};
bool axis_equal(const std::vector<std::size_t>& x, bool axis_equal(const std::vector<std::size_t>& x,
const std::vector<std::size_t>& y, const std::vector<std::size_t>& y,
std::size_t axis) std::size_t axis)
...@@ -281,7 +357,9 @@ void simplify_algebra::apply(program& p) const ...@@ -281,7 +357,9 @@ void simplify_algebra::apply(program& p) const
find_add_lit_broadcast{}, find_add_lit_broadcast{},
find_add_convs{}, find_add_convs{},
find_mul_conv{}, find_mul_conv{},
find_mul_add{}); find_mul_add{},
find_concat_unary{},
find_concat_binary{});
dead_code_elimination{}.apply(p); dead_code_elimination{}.apply(p);
} }
} }
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <migraphx/dfor.hpp> #include <migraphx/dfor.hpp>
#include <migraphx/op/batch_norm.hpp> #include <migraphx/op/batch_norm.hpp>
#include <migraphx/op/convolution.hpp> #include <migraphx/op/convolution.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <migraphx/op/quant_convolution.hpp> #include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/dot.hpp> #include <migraphx/op/dot.hpp>
#include <migraphx/op/quant_dot.hpp> #include <migraphx/op/quant_dot.hpp>
...@@ -144,13 +145,14 @@ struct cpu_lrn ...@@ -144,13 +145,14 @@ struct cpu_lrn
int height = output_shape.lens()[2]; int height = output_shape.lens()[2];
int width = output_shape.lens()[3]; int width = output_shape.lens()[3];
float alphaoverarea = op.alpha / float(op.size); float alphaoverarea = op.alpha / float(op.size);
int radius = (op.size - 1) / 2; int radius_lower = (op.size - 1) / 2;
int radius_upper = op.size / 2 + 1;
par_dfor(n_batch, height, width)([&](int b, int h, int w) { par_dfor(n_batch, height, width)([&](int b, int h, int w) {
float scale = 0; float scale = 0;
dfor(channels)([&](int c) { dfor(channels)([&](int c) {
auto start = (c - radius) < 0 ? 0 : (c - radius); auto start = (c - radius_lower) < 0 ? 0 : (c - radius_lower);
auto end = (c + radius) > channels ? channels : (c + radius); auto end = (c + radius_upper) > channels ? channels : (c + radius_upper);
for(auto k = start; k < end; ++k) for(auto k = start; k < end; ++k)
{ {
scale += std::pow(input(b, k, h, w), 2); scale += std::pow(input(b, k, h, w), 2);
...@@ -220,6 +222,67 @@ struct cpu_convolution ...@@ -220,6 +222,67 @@ struct cpu_convolution
} }
}; };
template <class Op>
struct cpu_deconvolution
{
Op op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "cpu::" + op.name(); }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, shape output_shape, std::vector<argument> args) const
{
argument result{output_shape};
visit_all(result, args[0], args[1])([&](auto output, auto input, auto weights) {
using type = typename decltype(output)::value_type;
std::fill(output.begin(), output.end(), type{0});
auto out_lens = output_shape.lens();
auto out_h = out_lens[2];
auto out_w = out_lens[3];
auto in = input.get_shape().lens();
auto in_n = in[0];
auto in_c = in[1];
auto in_h = in[2];
auto in_w = in[3];
auto wei = weights.get_shape().lens();
auto wei_n = wei[0];
auto wei_c = wei[1];
auto wei_h = wei[2];
auto wei_w = wei[3];
par_dfor(in_n, wei_c)([&](std::size_t o, std::size_t k) {
dfor(in_c, in_h, in_w, wei_h, wei_w)(
[&](std::size_t w, std::size_t i, std::size_t j, std::size_t x, std::size_t y) {
const int start_x = i * op.stride[0] - op.padding[0];
const int start_y = j * op.stride[1] - op.padding[1];
const int out_x = start_x + x * op.dilation[0];
const int out_y = start_y + y * op.dilation[1];
const auto group_id = w / (wei_n / op.group);
const auto in_ch = group_id * wei_c + k;
if(out_x >= 0 && out_x < out_h && out_y >= 0 && out_y < out_w)
{
output(o, in_ch, out_x, out_y) +=
input(o, w, i, j) * weights(w, k, x, y);
}
});
});
});
return result;
}
};
struct cpu_im2col struct cpu_im2col
{ {
op::im2col op; op::im2col op;
...@@ -598,9 +661,10 @@ struct cpu_softmax ...@@ -598,9 +661,10 @@ struct cpu_softmax
argument compute(context&, const shape& output_shape, std::vector<argument> args) const argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{ {
argument result{output_shape}; argument result{output_shape};
auto batch_lens = output_shape.lens(); auto batch_lens = output_shape.lens();
std::size_t n_dims = batch_lens[op.axis]; int64_t tuned_axis = (op.axis < 0) ? op.axis + args[0].get_shape().lens().size() : op.axis;
batch_lens[op.axis] = 1; std::size_t n_dims = batch_lens[tuned_axis];
batch_lens[tuned_axis] = 1;
shape batch_shape{shape::int32_type, batch_lens}; shape batch_shape{shape::int32_type, batch_lens};
visit_all(result, args[0])([&](auto output, auto input) { visit_all(result, args[0])([&](auto output, auto input) {
...@@ -612,26 +676,26 @@ struct cpu_softmax ...@@ -612,26 +676,26 @@ struct cpu_softmax
auto idx = batch_shape.multi(i); auto idx = batch_shape.multi(i);
for(std::size_t j = 0; j < n_dims; ++j) for(std::size_t j = 0; j < n_dims; ++j)
{ {
idx[op.axis] = j; idx[tuned_axis] = j;
batch_max[i] = std::max(batch_max[i], input(idx.begin(), idx.end())); batch_max[i] = std::max(batch_max[i], input(idx.begin(), idx.end()));
} }
for(std::size_t j = 0; j < n_dims; ++j) for(std::size_t j = 0; j < n_dims; ++j)
{ {
idx[op.axis] = j; idx[tuned_axis] = j;
std::size_t index = output_shape.index(idx); std::size_t index = output_shape.index(idx);
output[index] = std::exp(input[index] - batch_max[i]); output[index] = std::exp(input[index] - batch_max[i]);
} }
for(std::size_t j = 0; j < n_dims; ++j) for(std::size_t j = 0; j < n_dims; ++j)
{ {
idx[op.axis] = j; idx[tuned_axis] = j;
batch_sum[i] += output(idx.begin(), idx.end()); batch_sum[i] += output(idx.begin(), idx.end());
} }
for(std::size_t j = 0; j < n_dims; ++j) for(std::size_t j = 0; j < n_dims; ++j)
{ {
idx[op.axis] = j; idx[tuned_axis] = j;
output(idx.begin(), idx.end()) = output(idx.begin(), idx.end()) =
op.output()(output(idx.begin(), idx.end()), batch_sum[i]); op.output()(output(idx.begin(), idx.end()), batch_sum[i]);
} }
...@@ -664,8 +728,10 @@ struct cpu_apply ...@@ -664,8 +728,10 @@ struct cpu_apply
apply_map["batch_norm_inference"] = apply_map["batch_norm_inference"] =
extend_op<cpu_batch_norm_inference, op::batch_norm_inference>(); extend_op<cpu_batch_norm_inference, op::batch_norm_inference>();
apply_map["convolution"] = extend_op<cpu_convolution<op::convolution>, op::convolution>(); apply_map["convolution"] = extend_op<cpu_convolution<op::convolution>, op::convolution>();
apply_map["dot"] = extend_op<cpu_gemm, op::dot>(); apply_map["deconvolution"] =
apply_map["quant_dot"] = extend_op<cpu_quant_gemm, op::quant_dot>(); extend_op<cpu_deconvolution<op::deconvolution>, op::deconvolution>();
apply_map["dot"] = extend_op<cpu_gemm, op::dot>();
apply_map["quant_dot"] = extend_op<cpu_quant_gemm, op::quant_dot>();
apply_map["quant_convolution"] = apply_map["quant_convolution"] =
extend_op<cpu_convolution<op::quant_convolution>, op::quant_convolution>(); extend_op<cpu_convolution<op::quant_convolution>, op::quant_convolution>();
apply_map["elu"] = extend_op<cpu_unary<elu_op>, op::elu>(); apply_map["elu"] = extend_op<cpu_unary<elu_op>, op::elu>();
......
...@@ -12,6 +12,7 @@ endif() ...@@ -12,6 +12,7 @@ endif()
add_library(migraphx_device add_library(migraphx_device
device/acos.cpp device/acos.cpp
device/acosh.cpp
device/add.cpp device/add.cpp
device/add_clip.cpp device/add_clip.cpp
device/add_relu.cpp device/add_relu.cpp
...@@ -20,7 +21,9 @@ add_library(migraphx_device ...@@ -20,7 +21,9 @@ add_library(migraphx_device
device/argmax.cpp device/argmax.cpp
device/argmin.cpp device/argmin.cpp
device/asin.cpp device/asin.cpp
device/asinh.cpp
device/atan.cpp device/atan.cpp
device/atanh.cpp
device/ceil.cpp device/ceil.cpp
device/clip.cpp device/clip.cpp
device/concat.cpp device/concat.cpp
...@@ -43,10 +46,12 @@ add_library(migraphx_device ...@@ -43,10 +46,12 @@ add_library(migraphx_device
device/mul_add_relu.cpp device/mul_add_relu.cpp
device/pad.cpp device/pad.cpp
device/pow.cpp device/pow.cpp
device/prelu.cpp
device/reduce_max.cpp device/reduce_max.cpp
device/reduce_mean.cpp device/reduce_mean.cpp
device/reduce_min.cpp device/reduce_min.cpp
device/reduce_sum.cpp device/reduce_sum.cpp
device/reduce_prod.cpp
device/relu.cpp device/relu.cpp
device/round.cpp device/round.cpp
device/rsqrt.cpp device/rsqrt.cpp
...@@ -79,6 +84,7 @@ add_library(migraphx_gpu ...@@ -79,6 +84,7 @@ add_library(migraphx_gpu
lowering.cpp lowering.cpp
pooling.cpp pooling.cpp
convolution.cpp convolution.cpp
deconvolution.cpp
quant_convolution.cpp quant_convolution.cpp
softmax.cpp softmax.cpp
logsoftmax.cpp logsoftmax.cpp
......
...@@ -14,7 +14,9 @@ shape hip_argmax::compute_shape(const std::vector<shape>& inputs) const ...@@ -14,7 +14,9 @@ shape hip_argmax::compute_shape(const std::vector<shape>& inputs) const
argument hip_argmax::compute(context& ctx, const shape&, const std::vector<argument>& args) const argument hip_argmax::compute(context& ctx, const shape&, const std::vector<argument>& args) const
{ {
device::argmax(ctx.get_stream().get(), args.back(), args.front(), op.axis); auto n_dim = args.front().get_shape().lens().size();
int64_t tuned_axis = (op.axis < 0) ? op.axis + n_dim : op.axis;
device::argmax(ctx.get_stream().get(), args.back(), args.front(), tuned_axis);
return args.back(); return args.back();
} }
......
...@@ -14,7 +14,9 @@ shape hip_argmin::compute_shape(const std::vector<shape>& inputs) const ...@@ -14,7 +14,9 @@ shape hip_argmin::compute_shape(const std::vector<shape>& inputs) const
argument hip_argmin::compute(context& ctx, const shape&, const std::vector<argument>& args) const argument hip_argmin::compute(context& ctx, const shape&, const std::vector<argument>& args) const
{ {
device::argmin(ctx.get_stream().get(), args.back(), args.front(), op.axis); auto n_dim = args.front().get_shape().lens().size();
int64_t tuned_axis = (op.axis < 0) ? op.axis + n_dim : op.axis;
device::argmin(ctx.get_stream().get(), args.back(), args.front(), tuned_axis);
return args.back(); return args.back();
} }
......
#include <migraphx/gpu/deconvolution.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/generate.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape miopen_deconvolution::compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(4).standard();
return op.compute_shape({inputs.at(0), inputs.at(1)});
}
argument miopen_deconvolution::compute(context& ctx,
const shape& output_shape,
const std::vector<argument>& args) const
{
auto x_desc = make_tensor(args[0].get_shape());
auto w_desc = make_tensor(args[1].get_shape());
auto y_desc = make_tensor(output_shape);
float alpha = 1;
float beta = 0;
auto status = miopenConvolutionForward(ctx.get_stream().get_miopen(),
&alpha,
x_desc.get(),
args[0].implicit(),
w_desc.get(),
args[1].implicit(),
cd.get(),
algo,
&beta,
y_desc.get(),
args[3].implicit(),
args[2].implicit(),
args[2].get_shape().bytes());
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("Running deconvolution failed");
return args[3];
}
shape miopen_deconvolution::compile(context& ctx,
const shape& output_shape,
std::vector<shape> inputs)
{
shape workspace_shape{};
auto x_desc = make_tensor(inputs[0]);
auto w_desc = make_tensor(inputs[1]);
auto y_desc = make_tensor(output_shape);
std::size_t workspace_size = 0;
miopenConvolutionForwardGetWorkSpaceSize(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
&workspace_size);
workspace_shape = shape{shape::int8_type, {workspace_size}};
auto x = to_gpu(generate_argument(inputs[0]));
auto w = to_gpu(generate_argument(inputs[1]));
auto y = allocate_gpu(output_shape);
auto workspace = allocate_gpu(workspace_shape);
int algo_count = 1;
miopenConvAlgoPerf_t perf;
auto status = miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(),
x_desc.get(),
x.implicit(),
w_desc.get(),
w.implicit(),
cd.get(),
y_desc.get(),
y.implicit(),
1,
&algo_count,
&perf,
workspace.implicit(),
workspace_size,
false);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("Find deconvolution failed");
handle = ctx.get_stream().get_miopen();
algo = perf.fwd_algo;
return shape{shape::int8_type, {perf.memory}};
}
void miopen_deconvolution::finalize(context& ctx,
const shape& output_shape,
std::vector<shape> inputs)
{
if(handle == ctx.get_stream().get_miopen())
return;
// Check that workspace hasn't changed
auto size = inputs.at(2).bytes();
auto ws = compile(ctx, output_shape, std::move(inputs));
if(ws.bytes() > size)
MIGRAPHX_THROW("Workspace has changed during finalization.");
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -9,7 +9,7 @@ namespace device { ...@@ -9,7 +9,7 @@ namespace device {
void acos(hipStream_t stream, const argument& result, const argument& arg) void acos(hipStream_t stream, const argument& result, const argument& arg)
{ {
nary(stream, result, arg)([](auto x) { return ::acos(to_hip_type(x)); }); nary(stream, result, arg)([](auto x) __device__ { return ::acos(to_hip_type(x)); });
} }
} // namespace device } // namespace device
......
#include <migraphx/gpu/device/acosh.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void acosh(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::acosh(to_hip_type(x)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment