Commit 3a848f0d authored by Paul's avatar Paul
Browse files

Merge branch 'develop' into doc2

parents 64e8e30a d1e945da
......@@ -87,13 +87,15 @@ struct program
instruction_ref add_parameter(std::string name, shape s);
instruction_ref add_return(std::vector<instruction_ref> args);
shape get_parameter_shape(std::string name) const;
instruction_ref get_parameter(std::string name) const;
std::unordered_map<std::string, shape> get_parameter_shapes() const;
argument eval(parameter_map params) const;
std::vector<argument> eval(parameter_map params) const;
bool has_instruction(instruction_ref ins) const;
......@@ -101,7 +103,7 @@ struct program
instruction_ref begin() const;
instruction_ref end() const;
shape get_shape() const;
std::vector<shape> get_output_shapes() const;
context& get_context() const;
......
......@@ -69,11 +69,17 @@ struct schedule_model
template <typename PrivateDetailTypeErasedT>
schedule_model& operator=(PrivateDetailTypeErasedT value)
{
if(private_detail_te_handle_mem_var.unique())
*private_detail_te_handle_mem_var = std::forward<PrivateDetailTypeErasedT>(value);
else if(!private_detail_te_handle_mem_var)
private_detail_te_handle_mem_var = std::make_shared<PrivateDetailTypeErasedT>(
std::forward<PrivateDetailTypeErasedT>(value));
using std::swap;
auto* derived = this->any_cast<PrivateDetailTypeErasedT>();
if(derived and private_detail_te_handle_mem_var.unique())
{
*derived = std::forward<PrivateDetailTypeErasedT>(value);
}
else
{
schedule_model rhs(value);
swap(private_detail_te_handle_mem_var, rhs.private_detail_te_handle_mem_var);
}
return *this;
}
......@@ -81,7 +87,7 @@ struct schedule_model
template <typename PrivateDetailTypeErasedT>
PrivateDetailTypeErasedT* any_cast()
{
return private_detail_te_get_handle().type() == typeid(PrivateDetailTypeErasedT)
return this->type_id() == typeid(PrivateDetailTypeErasedT)
? std::addressof(static_cast<private_detail_te_handle_type<
typename std::remove_cv<PrivateDetailTypeErasedT>::type>&>(
private_detail_te_get_handle())
......@@ -92,7 +98,7 @@ struct schedule_model
template <typename PrivateDetailTypeErasedT>
const typename std::remove_cv<PrivateDetailTypeErasedT>::type* any_cast() const
{
return private_detail_te_get_handle().type() == typeid(PrivateDetailTypeErasedT)
return this->type_id() == typeid(PrivateDetailTypeErasedT)
? std::addressof(static_cast<const private_detail_te_handle_type<
typename std::remove_cv<PrivateDetailTypeErasedT>::type>&>(
private_detail_te_get_handle())
......
......@@ -115,11 +115,17 @@ struct target
template <typename PrivateDetailTypeErasedT>
target& operator=(PrivateDetailTypeErasedT value)
{
if(private_detail_te_handle_mem_var.unique())
*private_detail_te_handle_mem_var = std::forward<PrivateDetailTypeErasedT>(value);
else if(!private_detail_te_handle_mem_var)
private_detail_te_handle_mem_var = std::make_shared<PrivateDetailTypeErasedT>(
std::forward<PrivateDetailTypeErasedT>(value));
using std::swap;
auto* derived = this->any_cast<PrivateDetailTypeErasedT>();
if(derived and private_detail_te_handle_mem_var.unique())
{
*derived = std::forward<PrivateDetailTypeErasedT>(value);
}
else
{
target rhs(value);
swap(private_detail_te_handle_mem_var, rhs.private_detail_te_handle_mem_var);
}
return *this;
}
......@@ -127,7 +133,7 @@ struct target
template <typename PrivateDetailTypeErasedT>
PrivateDetailTypeErasedT* any_cast()
{
return private_detail_te_get_handle().type() == typeid(PrivateDetailTypeErasedT)
return this->type_id() == typeid(PrivateDetailTypeErasedT)
? std::addressof(static_cast<private_detail_te_handle_type<
typename std::remove_cv<PrivateDetailTypeErasedT>::type>&>(
private_detail_te_get_handle())
......@@ -138,7 +144,7 @@ struct target
template <typename PrivateDetailTypeErasedT>
const typename std::remove_cv<PrivateDetailTypeErasedT>::type* any_cast() const
{
return private_detail_te_get_handle().type() == typeid(PrivateDetailTypeErasedT)
return this->type_id() == typeid(PrivateDetailTypeErasedT)
? std::addressof(static_cast<const private_detail_te_handle_type<
typename std::remove_cv<PrivateDetailTypeErasedT>::type>&>(
private_detail_te_get_handle())
......
......@@ -7,8 +7,15 @@
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
/// struct to pass in tf options to parser
struct tf_options
{
bool is_nhwc = false;
unsigned int batch_size = 1;
};
/// Create a program from a tf pb file (default is nhwc format)
program parse_tf(const std::string& name, bool is_nhwc);
program parse_tf(const std::string& name, tf_options = tf_options{});
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......
......@@ -22,6 +22,9 @@ void instruction::replace(const shape& r)
result = r;
for(auto&& ins : output)
{
if(ins->name() == "@return")
continue;
assert(ins->name().front() != '@');
ins->recompute_shape();
}
......@@ -70,6 +73,10 @@ bool instruction::valid() const
{
computed = result;
}
else if(op.name() == "@return")
{
computed = {};
}
else
{
try
......@@ -81,6 +88,7 @@ bool instruction::valid() const
return false;
}
}
return result == computed && std::all_of(output.begin(), output.end(), [&](instruction_ref i) {
return std::find(i->inputs().begin(), i->inputs().end(), *this) != i->inputs().end();
});
......
......@@ -73,8 +73,9 @@ int main(int argc, char const* argv[])
for(int i = 0; i < 10; i++)
{
std::cout << "label: " << static_cast<uint32_t>(labels[i]) << " ----> ";
m["0"] = migraphx::gpu::to_gpu(migraphx::argument{s, &ptr[3072 * i]});
auto result = migraphx::gpu::from_gpu(prog.eval(m));
m["0"] = migraphx::gpu::to_gpu(migraphx::argument{s, &ptr[3072 * i]});
auto gpu_result = prog.eval(m).back();
auto result = migraphx::gpu::from_gpu(gpu_result);
std::vector<float> logits;
result.visit([&](auto output) { logits.assign(output.begin(), output.end()); });
std::vector<float> probs = softmax<float>(logits);
......@@ -95,7 +96,7 @@ int main(int argc, char const* argv[])
{
std::cout << "label: " << static_cast<uint32_t>(labels[i]) << " ----> ";
auto input3 = migraphx::argument{s, &ptr[3072 * i]};
auto result = prog.eval({{"0", input3}});
auto result = prog.eval({{"0", input3}}).back();
std::vector<float> logits;
result.visit([&](auto output) { logits.assign(output.begin(), output.end()); });
std::vector<float> probs = softmax<float>(logits);
......
......@@ -130,8 +130,9 @@ int main(int argc, char const* argv[])
for(int i = 0; i < 20; i++)
{
std::cout << "label: " << labels[i] << " ----> ";
m["0"] = migraphx::gpu::to_gpu(migraphx::argument{s, &ptr[784 * i]});
auto result = migraphx::gpu::from_gpu(prog.eval(m));
m["0"] = migraphx::gpu::to_gpu(migraphx::argument{s, &ptr[784 * i]});
auto results = prog.eval(m).back();
auto result = migraphx::gpu::from_gpu(results);
std::vector<float> logits;
result.visit([&](auto output) { logits.assign(output.begin(), output.end()); });
std::vector<float> probs = softmax(logits);
......
This diff is collapsed.
This diff is collapsed.
......@@ -52,7 +52,9 @@ static void print_instruction(std::ostream& os,
os << ")";
}
os << " -> " << ins->get_shape();
// skip return instruction shape
if(ins->name() != "@return")
os << " -> " << ins->get_shape();
}
template <class F>
......@@ -147,7 +149,14 @@ void program::assign(const program& p)
std::transform(inputs.begin(), inputs.end(), copy_inputs.begin(), [&](auto i) {
return ins_map[i];
});
copy_ins = add_instruction(ins->get_operator(), copy_inputs);
if(ins->name() == "@return")
{
copy_ins = add_return(copy_inputs);
}
else
{
copy_ins = add_instruction(ins->get_operator(), copy_inputs);
}
}
ins_map[ins] = copy_ins;
......@@ -270,6 +279,18 @@ instruction_ref program::add_parameter(std::string name, shape s)
return impl->instructions.begin();
}
instruction_ref program::add_return(std::vector<instruction_ref> args)
{
assert(std::all_of(
args.begin(), args.end(), [&](instruction_ref x) { return has_instruction(x); }) &&
"Argument is not an exisiting instruction");
impl->instructions.push_back({builtin::returns{}, {}, args});
auto result = std::prev(impl->instructions.end());
instruction::backreference(result);
assert(result->valid(begin()));
return result;
}
shape program::get_parameter_shape(std::string name) const
{
auto ins = std::find_if(
......@@ -334,7 +355,26 @@ std::size_t program::size() const { return impl->instructions.size(); }
instruction_ref program::begin() const { return impl->instructions.begin(); }
instruction_ref program::end() const { return impl->instructions.end(); }
shape program::get_shape() const { return impl->instructions.back().get_shape(); }
std::vector<shape> program::get_output_shapes() const
{
auto last_ins = impl->instructions.back();
if(last_ins.name() == "@return")
{
auto& output_ins = last_ins.inputs();
std::vector<shape> output_shapes;
std::transform(output_ins.begin(),
output_ins.end(),
std::back_inserter(output_shapes),
[](auto& ins) { return ins->get_shape(); });
return output_shapes;
}
// The else branch is to provide backward compatibility
else
{
return {last_ins.get_shape()};
}
}
context& program::get_context() const { return impl->ctx; }
......@@ -372,10 +412,10 @@ void program::finalize()
}
template <class F>
argument generic_eval(const program& p,
context& ctx,
std::unordered_map<std::string, argument> params,
F trace)
std::vector<argument> generic_eval(const program& p,
context& ctx,
std::unordered_map<std::string, argument> params,
F trace)
{
assert(p.validate() == p.end());
std::unordered_map<instruction_ref, argument> results;
......@@ -407,6 +447,19 @@ argument generic_eval(const program& p,
{
results.emplace(ins, trace(ins, [&] { return argument{ins->get_shape(), nullptr}; }));
}
else if(name == "@return")
{
std::vector<argument> prog_outputs;
std::transform(ins->inputs().begin(),
ins->inputs().end(),
std::back_inserter(prog_outputs),
[&](instruction_ref i) {
assert(results.find(i) != results.end());
return results[i];
});
return prog_outputs;
}
else
{
values.resize(ins->inputs().size());
......@@ -421,10 +474,11 @@ argument generic_eval(const program& p,
}
assert(results.find(ins) != results.end());
}
return results.at(std::prev(p.end()));
return {results.at(std::prev(p.end()))};
}
argument program::eval(std::unordered_map<std::string, argument> params) const
std::vector<argument> program::eval(parameter_map params) const
{
auto& ctx = this->impl->ctx;
#ifndef NDEBUG
......@@ -531,6 +585,11 @@ void program::perf_report(std::ostream& os, std::size_t n, parameter_map params)
print_program(*this, [&](auto ins, const auto& names) {
print_instruction(std::cout, ins, names);
// skip return instruction
if(ins->name() == "@return")
return;
double avg = common_average(ins_vec[ins]);
double percent = std::ceil(100.0 * avg / total_instruction_time);
os << ": " << avg << "ms, " << percent << "%";
......
......@@ -158,7 +158,7 @@ PYBIND11_MODULE(migraphx, m)
py::class_<migraphx::program>(m, "program")
.def("clone", [](migraphx::program& p) { return *(new migraphx::program(p)); })
.def("get_parameter_shapes", &migraphx::program::get_parameter_shapes)
.def("get_shape", &migraphx::program::get_shape)
.def("get_output_shapes", &migraphx::program::get_output_shapes)
.def("compile",
[](migraphx::program& p, const migraphx::target& t, bool offload_copy) {
migraphx::compile_options options;
......@@ -173,11 +173,20 @@ PYBIND11_MODULE(migraphx, m)
.def("__repr__", [](const migraphx::program& p) { return migraphx::to_string(p); });
m.def("parse_tf",
&migraphx::parse_tf,
[](const std::string& filename, bool is_nhwc, unsigned int batch_size) {
return migraphx::parse_tf(filename, migraphx::tf_options{is_nhwc, batch_size});
},
"Parse tf protobuf (default format is nhwc)",
py::arg("filename"),
py::arg("is_nhwc") = true);
m.def("parse_onnx", &migraphx::parse_onnx);
py::arg("is_nhwc") = true,
py::arg("batch_size") = 1);
m.def("parse_onnx",
[](const std::string& filename, unsigned int batch_size) {
return migraphx::parse_onnx(filename, migraphx::onnx_options{batch_size});
},
"Parse onnx file",
py::arg("filename"),
py::arg("batch_size") = 1);
m.def("get_target", [](const std::string& name) -> migraphx::target {
if(name == "cpu")
......
......@@ -105,6 +105,9 @@ void quantize_fp16(program& prog, const std::vector<std::string>& ins_names)
std::unordered_map<instruction_ref, instruction_ref> map_fp16;
for(auto ins : iterator_for(prog))
{
if(ins->name() == "@return")
break;
// all indicates every instruction is converted
if((not contains(ins_names, "all")) and (not contains(ins_names, ins->name())))
{
......@@ -335,6 +338,9 @@ void quantize_int8_impl(program& prog,
std::unordered_map<instruction_ref, std::size_t> map_ins_index;
for(auto ins : iterator_for(prog))
{
if(ins->name() == "@return")
break;
if(not contains(ins_names, ins->name()))
{
continue;
......
......@@ -27,6 +27,15 @@ auto conv_const_weights()
match::args(match::any(), match::is_constant().bind("w")));
}
MIGRAPHX_PRED_MATCHER(args_has_same_ops, instruction_ref ins)
{
if(ins->inputs().empty())
return true;
return std::all_of(ins->inputs().begin(), ins->inputs().end(), [&](auto j) {
return j->get_operator() == ins->inputs().front()->get_operator();
});
}
struct find_mul_conv
{
auto matcher() const
......@@ -167,6 +176,73 @@ struct find_inner_broadcast
}
};
struct find_concat_unary
{
auto matcher() const
{
return match::name("concat")(args_has_same_ops(),
match::arg(0)(match::nargs(1),
match::name("relu", "broadcast").bind("x"),
match::used_once()));
}
void apply(program& p, match::matcher_result r) const
{
auto ins = r.result;
auto x = r.instructions["x"];
auto op = x->get_operator();
auto axis = any_cast<op::concat>(ins->get_operator()).axis;
// Adjust broadcast lens
if(op.name() == "broadcast")
{
auto b = any_cast<op::broadcast>(op);
if(b.axis != axis)
return;
b.broadcast_lens = ins->get_shape().lens();
op = b;
axis = 0;
}
auto inputs = ins->inputs();
std::transform(inputs.begin(), inputs.end(), inputs.begin(), [&](auto i) {
return i->inputs().front();
});
auto concat = p.insert_instruction(ins, op::concat{axis}, inputs);
p.replace_instruction(ins, op, concat);
}
};
struct find_concat_binary
{
auto matcher() const
{
return match::name("concat")(args_has_same_ops(),
match::arg(0)(match::nargs(2),
match::name("add", "multiply").bind("x"),
match::used_once()));
}
void apply(program& p, match::matcher_result r) const
{
auto ins = r.result;
auto x = r.instructions["x"];
auto op = x->get_operator();
auto concat_op = ins->get_operator();
auto xinputs = ins->inputs();
std::transform(xinputs.begin(), xinputs.end(), xinputs.begin(), [&](auto i) {
return i->inputs().front();
});
auto yinputs = ins->inputs();
std::transform(yinputs.begin(), yinputs.end(), yinputs.begin(), [&](auto i) {
return i->inputs().back();
});
auto xconcat = p.insert_instruction(ins, concat_op, xinputs);
auto yconcat = p.insert_instruction(ins, concat_op, yinputs);
p.replace_instruction(ins, op, xconcat, yconcat);
}
};
bool axis_equal(const std::vector<std::size_t>& x,
const std::vector<std::size_t>& y,
std::size_t axis)
......@@ -281,7 +357,9 @@ void simplify_algebra::apply(program& p) const
find_add_lit_broadcast{},
find_add_convs{},
find_mul_conv{},
find_mul_add{});
find_mul_add{},
find_concat_unary{},
find_concat_binary{});
dead_code_elimination{}.apply(p);
}
}
......
......@@ -4,6 +4,7 @@
#include <migraphx/dfor.hpp>
#include <migraphx/op/batch_norm.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/quant_dot.hpp>
......@@ -144,13 +145,14 @@ struct cpu_lrn
int height = output_shape.lens()[2];
int width = output_shape.lens()[3];
float alphaoverarea = op.alpha / float(op.size);
int radius = (op.size - 1) / 2;
int radius_lower = (op.size - 1) / 2;
int radius_upper = op.size / 2 + 1;
par_dfor(n_batch, height, width)([&](int b, int h, int w) {
float scale = 0;
dfor(channels)([&](int c) {
auto start = (c - radius) < 0 ? 0 : (c - radius);
auto end = (c + radius) > channels ? channels : (c + radius);
auto start = (c - radius_lower) < 0 ? 0 : (c - radius_lower);
auto end = (c + radius_upper) > channels ? channels : (c + radius_upper);
for(auto k = start; k < end; ++k)
{
scale += std::pow(input(b, k, h, w), 2);
......@@ -220,6 +222,67 @@ struct cpu_convolution
}
};
template <class Op>
struct cpu_deconvolution
{
Op op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "cpu::" + op.name(); }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, shape output_shape, std::vector<argument> args) const
{
argument result{output_shape};
visit_all(result, args[0], args[1])([&](auto output, auto input, auto weights) {
using type = typename decltype(output)::value_type;
std::fill(output.begin(), output.end(), type{0});
auto out_lens = output_shape.lens();
auto out_h = out_lens[2];
auto out_w = out_lens[3];
auto in = input.get_shape().lens();
auto in_n = in[0];
auto in_c = in[1];
auto in_h = in[2];
auto in_w = in[3];
auto wei = weights.get_shape().lens();
auto wei_n = wei[0];
auto wei_c = wei[1];
auto wei_h = wei[2];
auto wei_w = wei[3];
par_dfor(in_n, wei_c)([&](std::size_t o, std::size_t k) {
dfor(in_c, in_h, in_w, wei_h, wei_w)(
[&](std::size_t w, std::size_t i, std::size_t j, std::size_t x, std::size_t y) {
const int start_x = i * op.stride[0] - op.padding[0];
const int start_y = j * op.stride[1] - op.padding[1];
const int out_x = start_x + x * op.dilation[0];
const int out_y = start_y + y * op.dilation[1];
const auto group_id = w / (wei_n / op.group);
const auto in_ch = group_id * wei_c + k;
if(out_x >= 0 && out_x < out_h && out_y >= 0 && out_y < out_w)
{
output(o, in_ch, out_x, out_y) +=
input(o, w, i, j) * weights(w, k, x, y);
}
});
});
});
return result;
}
};
struct cpu_im2col
{
op::im2col op;
......@@ -598,9 +661,10 @@ struct cpu_softmax
argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
auto batch_lens = output_shape.lens();
std::size_t n_dims = batch_lens[op.axis];
batch_lens[op.axis] = 1;
auto batch_lens = output_shape.lens();
int64_t tuned_axis = (op.axis < 0) ? op.axis + args[0].get_shape().lens().size() : op.axis;
std::size_t n_dims = batch_lens[tuned_axis];
batch_lens[tuned_axis] = 1;
shape batch_shape{shape::int32_type, batch_lens};
visit_all(result, args[0])([&](auto output, auto input) {
......@@ -612,26 +676,26 @@ struct cpu_softmax
auto idx = batch_shape.multi(i);
for(std::size_t j = 0; j < n_dims; ++j)
{
idx[op.axis] = j;
batch_max[i] = std::max(batch_max[i], input(idx.begin(), idx.end()));
idx[tuned_axis] = j;
batch_max[i] = std::max(batch_max[i], input(idx.begin(), idx.end()));
}
for(std::size_t j = 0; j < n_dims; ++j)
{
idx[op.axis] = j;
idx[tuned_axis] = j;
std::size_t index = output_shape.index(idx);
output[index] = std::exp(input[index] - batch_max[i]);
}
for(std::size_t j = 0; j < n_dims; ++j)
{
idx[op.axis] = j;
idx[tuned_axis] = j;
batch_sum[i] += output(idx.begin(), idx.end());
}
for(std::size_t j = 0; j < n_dims; ++j)
{
idx[op.axis] = j;
idx[tuned_axis] = j;
output(idx.begin(), idx.end()) =
op.output()(output(idx.begin(), idx.end()), batch_sum[i]);
}
......@@ -664,8 +728,10 @@ struct cpu_apply
apply_map["batch_norm_inference"] =
extend_op<cpu_batch_norm_inference, op::batch_norm_inference>();
apply_map["convolution"] = extend_op<cpu_convolution<op::convolution>, op::convolution>();
apply_map["dot"] = extend_op<cpu_gemm, op::dot>();
apply_map["quant_dot"] = extend_op<cpu_quant_gemm, op::quant_dot>();
apply_map["deconvolution"] =
extend_op<cpu_deconvolution<op::deconvolution>, op::deconvolution>();
apply_map["dot"] = extend_op<cpu_gemm, op::dot>();
apply_map["quant_dot"] = extend_op<cpu_quant_gemm, op::quant_dot>();
apply_map["quant_convolution"] =
extend_op<cpu_convolution<op::quant_convolution>, op::quant_convolution>();
apply_map["elu"] = extend_op<cpu_unary<elu_op>, op::elu>();
......
......@@ -12,6 +12,7 @@ endif()
add_library(migraphx_device
device/acos.cpp
device/acosh.cpp
device/add.cpp
device/add_clip.cpp
device/add_relu.cpp
......@@ -20,7 +21,9 @@ add_library(migraphx_device
device/argmax.cpp
device/argmin.cpp
device/asin.cpp
device/asinh.cpp
device/atan.cpp
device/atanh.cpp
device/ceil.cpp
device/clip.cpp
device/concat.cpp
......@@ -43,10 +46,12 @@ add_library(migraphx_device
device/mul_add_relu.cpp
device/pad.cpp
device/pow.cpp
device/prelu.cpp
device/reduce_max.cpp
device/reduce_mean.cpp
device/reduce_min.cpp
device/reduce_sum.cpp
device/reduce_prod.cpp
device/relu.cpp
device/round.cpp
device/rsqrt.cpp
......@@ -79,6 +84,7 @@ add_library(migraphx_gpu
lowering.cpp
pooling.cpp
convolution.cpp
deconvolution.cpp
quant_convolution.cpp
softmax.cpp
logsoftmax.cpp
......
......@@ -14,7 +14,9 @@ shape hip_argmax::compute_shape(const std::vector<shape>& inputs) const
argument hip_argmax::compute(context& ctx, const shape&, const std::vector<argument>& args) const
{
device::argmax(ctx.get_stream().get(), args.back(), args.front(), op.axis);
auto n_dim = args.front().get_shape().lens().size();
int64_t tuned_axis = (op.axis < 0) ? op.axis + n_dim : op.axis;
device::argmax(ctx.get_stream().get(), args.back(), args.front(), tuned_axis);
return args.back();
}
......
......@@ -14,7 +14,9 @@ shape hip_argmin::compute_shape(const std::vector<shape>& inputs) const
argument hip_argmin::compute(context& ctx, const shape&, const std::vector<argument>& args) const
{
device::argmin(ctx.get_stream().get(), args.back(), args.front(), op.axis);
auto n_dim = args.front().get_shape().lens().size();
int64_t tuned_axis = (op.axis < 0) ? op.axis + n_dim : op.axis;
device::argmin(ctx.get_stream().get(), args.back(), args.front(), tuned_axis);
return args.back();
}
......
#include <migraphx/gpu/deconvolution.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/generate.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape miopen_deconvolution::compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(4).standard();
return op.compute_shape({inputs.at(0), inputs.at(1)});
}
argument miopen_deconvolution::compute(context& ctx,
const shape& output_shape,
const std::vector<argument>& args) const
{
auto x_desc = make_tensor(args[0].get_shape());
auto w_desc = make_tensor(args[1].get_shape());
auto y_desc = make_tensor(output_shape);
float alpha = 1;
float beta = 0;
auto status = miopenConvolutionForward(ctx.get_stream().get_miopen(),
&alpha,
x_desc.get(),
args[0].implicit(),
w_desc.get(),
args[1].implicit(),
cd.get(),
algo,
&beta,
y_desc.get(),
args[3].implicit(),
args[2].implicit(),
args[2].get_shape().bytes());
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("Running deconvolution failed");
return args[3];
}
shape miopen_deconvolution::compile(context& ctx,
const shape& output_shape,
std::vector<shape> inputs)
{
shape workspace_shape{};
auto x_desc = make_tensor(inputs[0]);
auto w_desc = make_tensor(inputs[1]);
auto y_desc = make_tensor(output_shape);
std::size_t workspace_size = 0;
miopenConvolutionForwardGetWorkSpaceSize(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
&workspace_size);
workspace_shape = shape{shape::int8_type, {workspace_size}};
auto x = to_gpu(generate_argument(inputs[0]));
auto w = to_gpu(generate_argument(inputs[1]));
auto y = allocate_gpu(output_shape);
auto workspace = allocate_gpu(workspace_shape);
int algo_count = 1;
miopenConvAlgoPerf_t perf;
auto status = miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(),
x_desc.get(),
x.implicit(),
w_desc.get(),
w.implicit(),
cd.get(),
y_desc.get(),
y.implicit(),
1,
&algo_count,
&perf,
workspace.implicit(),
workspace_size,
false);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("Find deconvolution failed");
handle = ctx.get_stream().get_miopen();
algo = perf.fwd_algo;
return shape{shape::int8_type, {perf.memory}};
}
void miopen_deconvolution::finalize(context& ctx,
const shape& output_shape,
std::vector<shape> inputs)
{
if(handle == ctx.get_stream().get_miopen())
return;
// Check that workspace hasn't changed
auto size = inputs.at(2).bytes();
auto ws = compile(ctx, output_shape, std::move(inputs));
if(ws.bytes() > size)
MIGRAPHX_THROW("Workspace has changed during finalization.");
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -9,7 +9,7 @@ namespace device {
void acos(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::acos(to_hip_type(x)); });
nary(stream, result, arg)([](auto x) __device__ { return ::acos(to_hip_type(x)); });
}
} // namespace device
......
#include <migraphx/gpu/device/acosh.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void acosh(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::acosh(to_hip_type(x)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment