Unverified Commit a5c1c7f6 authored by Paul Fultz II's avatar Paul Fultz II Committed by GitHub
Browse files

Merge branch 'develop' into mem_color_ordering_fix

parents 462a4920 d516b099
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
#include <blaze/math/CustomMatrix.h> #include <blaze/math/CustomMatrix.h>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace cpu { namespace cpu {
template <class T> template <class T>
...@@ -94,5 +94,5 @@ void migemm( ...@@ -94,5 +94,5 @@ void migemm(
} }
} // namespace cpu } // namespace cpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
#ifndef MIGRAPH_GUARD_RTGLIB_CONTEXT_HPP #ifndef MIGRAPHX_GUARD_RTGLIB_CONTEXT_HPP
#define MIGRAPH_GUARD_RTGLIB_CONTEXT_HPP #define MIGRAPHX_GUARD_RTGLIB_CONTEXT_HPP
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace cpu { namespace cpu {
struct context struct context
...@@ -13,7 +13,7 @@ struct context ...@@ -13,7 +13,7 @@ struct context
}; };
} // namespace cpu } // namespace cpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
#endif #endif
#ifndef MIGRAPH_GUARD_RTGLIB_CPU_GEMM_HPP #ifndef MIGRAPHX_GUARD_RTGLIB_CPU_GEMM_HPP
#define MIGRAPH_GUARD_RTGLIB_CPU_GEMM_HPP #define MIGRAPHX_GUARD_RTGLIB_CPU_GEMM_HPP
#include <migraphx/argument.hpp> #include <migraphx/argument.hpp>
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace cpu { namespace cpu {
void migemm( void migemm(
const argument& c_arg, const argument& a_arg, const argument& b_arg, float alpha, float beta); const argument& c_arg, const argument& a_arg, const argument& b_arg, float alpha, float beta);
} // namespace cpu } // namespace cpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
#endif #endif
#ifndef MIGRAPH_GUARD_RTGLIB_CPU_LOWERING_HPP #ifndef MIGRAPHX_GUARD_RTGLIB_CPU_LOWERING_HPP
#define MIGRAPH_GUARD_RTGLIB_CPU_LOWERING_HPP #define MIGRAPHX_GUARD_RTGLIB_CPU_LOWERING_HPP
#include <migraphx/program.hpp> #include <migraphx/program.hpp>
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace cpu { namespace cpu {
struct lowering struct lowering
...@@ -15,7 +15,7 @@ struct lowering ...@@ -15,7 +15,7 @@ struct lowering
}; };
} // namespace cpu } // namespace cpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
#endif #endif
#ifndef MIGRAPH_GUARD_MIGRAPHLIB_CPU_TARGET_HPP #ifndef MIGRAPHX_GUARD_MIGRAPHLIB_CPU_TARGET_HPP
#define MIGRAPH_GUARD_MIGRAPHLIB_CPU_TARGET_HPP #define MIGRAPHX_GUARD_MIGRAPHLIB_CPU_TARGET_HPP
#include <migraphx/program.hpp> #include <migraphx/program.hpp>
#include <migraphx/cpu/context.hpp> #include <migraphx/cpu/context.hpp>
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace cpu { namespace cpu {
struct target struct target
...@@ -17,7 +17,7 @@ struct target ...@@ -17,7 +17,7 @@ struct target
}; };
} // namespace cpu } // namespace cpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
#endif #endif
...@@ -5,12 +5,13 @@ ...@@ -5,12 +5,13 @@
#include <migraphx/operators.hpp> #include <migraphx/operators.hpp>
#include <migraphx/shape_for_each.hpp> #include <migraphx/shape_for_each.hpp>
#include <migraphx/iterator_for.hpp> #include <migraphx/iterator_for.hpp>
#include <migraphx/par_dfor.hpp>
#include <migraphx/cpu/gemm.hpp> #include <migraphx/cpu/gemm.hpp>
#include <unordered_map> #include <unordered_map>
#include <utility> #include <utility>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace cpu { namespace cpu {
template <typename T> template <typename T>
...@@ -19,6 +20,14 @@ T zero(const T&) ...@@ -19,6 +20,14 @@ T zero(const T&)
return T(0); return T(0);
} }
template <class T>
typename std::conditional_t<std::is_integral<T>{}, std::make_signed<T>, std::enable_if<true, T>>::
type
make_signed(T x)
{
return x;
}
// //
// cpu implemenataion of batch norm for inference // cpu implemenataion of batch norm for inference
// //
...@@ -64,7 +73,7 @@ struct cpu_batch_norm_inference ...@@ -64,7 +73,7 @@ struct cpu_batch_norm_inference
visit_all(output, input, mini_batch_mean, mini_batch_variance, arg_gamma, arg_bias)( visit_all(output, input, mini_batch_mean, mini_batch_variance, arg_gamma, arg_bias)(
[&](auto result, auto buffer, auto mean, auto variance, auto gamma, auto bias) { [&](auto result, auto buffer, auto mean, auto variance, auto gamma, auto bias) {
dfor(num_batch, num_channels, image_height, image_width)( par_dfor(num_batch, num_channels, image_height, image_width)(
[&](std::size_t n, std::size_t c, std::size_t h, std::size_t w) { [&](std::size_t n, std::size_t c, std::size_t h, std::size_t w) {
assert((variance(c) + epsilon) > 0); assert((variance(c) + epsilon) > 0);
result(n, c, h, w) = gamma(c) * (buffer(n, c, h, w) - mean(c)) / result(n, c, h, w) = gamma(c) * (buffer(n, c, h, w) - mean(c)) /
...@@ -79,7 +88,7 @@ struct cpu_batch_norm_inference ...@@ -79,7 +88,7 @@ struct cpu_batch_norm_inference
visit_all(output, input, mini_batch_mean, mini_batch_mean, arg_gamma, arg_bias)( visit_all(output, input, mini_batch_mean, mini_batch_mean, arg_gamma, arg_bias)(
[&](auto result, auto buffer, auto mean, auto variance, auto gamma, auto bias) { [&](auto result, auto buffer, auto mean, auto variance, auto gamma, auto bias) {
dfor(num_batch, num_channels, image_height, image_width)( par_dfor(num_batch, num_channels, image_height, image_width)(
[&](std::size_t n, std::size_t c, std::size_t h, std::size_t w) { [&](std::size_t n, std::size_t c, std::size_t h, std::size_t w) {
assert((variance(c, h, w) + epsilon) > 0); assert((variance(c, h, w) + epsilon) > 0);
result(n, c, h, w) = gamma(c, h, w) * result(n, c, h, w) = gamma(c, h, w) *
...@@ -94,6 +103,43 @@ struct cpu_batch_norm_inference ...@@ -94,6 +103,43 @@ struct cpu_batch_norm_inference
} }
}; };
struct cpu_lrn
{
op::lrn op;
std::string name() const { return "cpu::lrn"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, shape output_shape, std::vector<argument> args) const
{
argument result{output_shape};
visit_all(result, args[0])([&](auto output, auto input) {
int n_batch = output_shape.lens()[0];
int channels = output_shape.lens()[1];
int height = output_shape.lens()[2];
int width = output_shape.lens()[3];
float alphaoverarea = op.alpha / op.size;
int radius = (op.size - 1) / 2;
par_dfor(n_batch, height, width)([&](int b, int h, int w) {
float scale = 0;
dfor(channels)([&](int c) {
auto start = (c - radius) < 0 ? 0 : (c - radius);
auto end = (c + radius) > channels ? channels : (c + radius);
for(auto k = start; k < end; ++k)
{
scale += std::pow(input(b, k, h, w), 2);
}
scale *= alphaoverarea;
scale += op.bias;
scale = std::pow(scale, -op.beta);
output(b, c, h, w) = input(b, c, h, w) * scale;
});
});
});
return result;
}
};
struct cpu_convolution struct cpu_convolution
{ {
op::convolution op; op::convolution op;
...@@ -104,28 +150,33 @@ struct cpu_convolution ...@@ -104,28 +150,33 @@ struct cpu_convolution
{ {
argument result{output_shape}; argument result{output_shape};
visit_all(result, args[0], args[1])([&](auto output, auto input, auto weights) { visit_all(result, args[0], args[1])([&](auto output, auto input, auto weights) {
auto in_h = input.get_shape().lens()[2]; auto in = input.get_shape().lens();
auto in_w = input.get_shape().lens()[3]; auto in_h = in[2];
auto in_w = in[3];
auto wei_c = weights.get_shape().lens()[1];
auto wei_h = weights.get_shape().lens()[2]; auto wei = weights.get_shape().lens();
auto wei_w = weights.get_shape().lens()[3]; auto wei_n = wei[0];
auto wei_c = wei[1];
dfor(output_shape.lens()[0], auto wei_h = wei[2];
output_shape.lens()[1], auto wei_w = wei[3];
output_shape.lens()[2],
output_shape.lens()[3])( par_dfor(output_shape.lens()[0],
output_shape.lens()[1],
output_shape.lens()[2],
output_shape.lens()[3])(
[&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) { [&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) {
const int start_x = i * op.stride[0] - op.padding[0]; const int start_x = i * op.stride[0] - op.padding[0];
const int start_y = j * op.stride[1] - op.padding[1]; const int start_y = j * op.stride[1] - op.padding[1];
const int group_id = w / (wei_n / op.group);
double acc = 0; double acc = 0;
dfor(wei_c, wei_h, wei_w)([&](std::size_t k, std::size_t x, std::size_t y) { dfor(wei_c, wei_h, wei_w)([&](std::size_t k, std::size_t x, std::size_t y) {
const int in_x = start_x + x; const int in_x = start_x + x;
const int in_y = start_y + y; const int in_y = start_y + y;
const int in_ch = group_id * wei_c + k;
if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w) if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w)
{ {
acc += input(o, k, in_x, in_y) * weights(w, k, x, y); acc += input(o, in_ch, in_x, in_y) * weights(w, k, x, y);
} }
}); });
output(o, w, i, j) = acc; output(o, w, i, j) = acc;
...@@ -158,7 +209,8 @@ struct cpu_im2col ...@@ -158,7 +209,8 @@ struct cpu_im2col
const std::size_t& stride_h = op.stride[0]; const std::size_t& stride_h = op.stride[0];
const std::size_t& stride_w = op.stride[1]; const std::size_t& stride_w = op.stride[1];
int kdiv2_h, kdiv2_w; int kdiv2_h;
int kdiv2_w;
kdiv2_h = kernel_h / 2; kdiv2_h = kernel_h / 2;
kdiv2_w = kernel_w / 2; kdiv2_w = kernel_w / 2;
// calculate output sizes // calculate output sizes
...@@ -231,10 +283,10 @@ struct cpu_pooling ...@@ -231,10 +283,10 @@ struct cpu_pooling
auto in_h = input.get_shape().lens()[2]; auto in_h = input.get_shape().lens()[2];
auto in_w = input.get_shape().lens()[3]; auto in_w = input.get_shape().lens()[3];
dfor(output_shape.lens()[0], par_dfor(output_shape.lens()[0],
output_shape.lens()[1], output_shape.lens()[1],
output_shape.lens()[2], output_shape.lens()[2],
output_shape.lens()[3])( output_shape.lens()[3])(
[&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) { [&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) {
const int start_x0 = i * op.stride[0] - op.padding[0]; const int start_x0 = i * op.stride[0] - op.padding[0];
const int start_y0 = j * op.stride[1] - op.padding[1]; const int start_y0 = j * op.stride[1] - op.padding[1];
...@@ -271,14 +323,33 @@ struct cpu_contiguous ...@@ -271,14 +323,33 @@ struct cpu_contiguous
std::string name() const { return "cpu::contiguous"; } std::string name() const { return "cpu::contiguous"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); } shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, const shape& output_shape, std::vector<argument> args) const argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{
return op.compute(output_shape, std::move(args));
}
};
struct cpu_pad
{
op::pad op;
std::string name() const { return "cpu::contiguous"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{ {
assert(output_shape.standard()); assert(output_shape.standard());
argument result{output_shape}; argument result{output_shape};
result.visit([&](auto output) { std::fill(output.begin(), output.end(), op.value); });
visit_all(result, args[0])([&](auto output, auto input) { visit_all(result, args[0])([&](auto output, auto input) {
shape_for_each(output.get_shape(), [&](const auto& idx) { shape_for_each(input.get_shape(), [&](const auto& idx) {
output(idx.begin(), idx.end()) = input(idx.begin(), idx.end()); std::vector<std::size_t> new_idx(idx.size());
std::transform(
idx.begin(), idx.end(), op.pads.begin(), new_idx.begin(), [](auto i, auto j) {
return i + j;
});
output(new_idx.begin(), new_idx.end()) = input(idx.begin(), idx.end());
}); });
}); });
return result; return result;
} }
}; };
...@@ -290,24 +361,7 @@ struct cpu_concat ...@@ -290,24 +361,7 @@ struct cpu_concat
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); } shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, const shape& output_shape, std::vector<argument> args) const argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{ {
argument result{output_shape}; return op.compute(output_shape, std::move(args));
std::vector<std::size_t> coffsets = op.compute_offsets(output_shape, args);
for(std::size_t l = 0; l < args.size(); l++)
{
auto argl = args[l];
std::size_t nelements = argl.get_shape().elements();
visit_all(result, argl)([&](auto output, auto input) {
auto slice_shape =
shape{output_shape.type(), input.get_shape().lens(), output_shape.strides()};
auto slice = make_view(slice_shape, output.data() + coffsets[l]);
// cppcheck-suppress useStlAlgorithm
for(std::size_t i = 0; i < nelements; i++)
{
slice[i] = input[i];
}
});
}
return result;
} }
}; };
...@@ -325,6 +379,18 @@ struct cpu_gemm ...@@ -325,6 +379,18 @@ struct cpu_gemm
} }
}; };
struct cpu_gather
{
op::gather op;
std::string name() const { return "cpu::gather"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{
return op.compute(output_shape, std::move(args));
}
};
struct identity_op struct identity_op
{ {
std::string name() const { return "cpu::identity"; } std::string name() const { return "cpu::identity"; }
...@@ -339,7 +405,7 @@ struct abs_op ...@@ -339,7 +405,7 @@ struct abs_op
std::string name() const { return "cpu::abs"; } std::string name() const { return "cpu::abs"; }
auto fcn() const auto fcn() const
{ {
return [](auto x) { return std::abs(x); }; return [](auto x) { return std::abs(make_signed(x)); };
} }
}; };
...@@ -352,6 +418,15 @@ struct exp_op ...@@ -352,6 +418,15 @@ struct exp_op
} }
}; };
struct log_op
{
std::string name() const { return "cpu::log"; }
auto fcn() const
{
return [](auto x) { return std::log(x); };
}
};
struct sin_op struct sin_op
{ {
std::string name() const { return "cpu::sin"; } std::string name() const { return "cpu::sin"; }
...@@ -406,6 +481,24 @@ struct atan_op ...@@ -406,6 +481,24 @@ struct atan_op
} }
}; };
struct sinh_op
{
std::string name() const { return "cpu::sinh"; }
auto fcn() const
{
return [](auto x) { return std::sinh(x); };
}
};
struct cosh_op
{
std::string name() const { return "cpu::cosh"; }
auto fcn() const
{
return [](auto x) { return std::cosh(x); };
}
};
struct tanh_op struct tanh_op
{ {
std::string name() const { return "cpu::tanh"; } std::string name() const { return "cpu::tanh"; }
...@@ -453,6 +546,17 @@ struct leaky_relu_op ...@@ -453,6 +546,17 @@ struct leaky_relu_op
} }
}; };
struct elu_op
{
op::elu op;
std::string name() const { return "cpu::elu"; }
auto fcn() const
{
auto& a = op.alpha;
return [a](auto x) { return x > 0 ? x : a * std::expm1(x); };
}
};
template <typename Op> template <typename Op>
struct cpu_unary struct cpu_unary
{ {
...@@ -545,6 +649,24 @@ struct div_op ...@@ -545,6 +649,24 @@ struct div_op
} }
}; };
struct max_op
{
std::string name() const { return "max"; }
auto fcn() const
{
return [](auto x, auto y) { return std::max(x, y); };
}
};
struct min_op
{
std::string name() const { return "min"; }
auto fcn() const
{
return [](auto x, auto y) { return std::min(x, y); };
}
};
template <typename Op> template <typename Op>
struct cpu_binary struct cpu_binary
{ {
...@@ -596,22 +718,35 @@ struct cpu_apply ...@@ -596,22 +718,35 @@ struct cpu_apply
apply_map["dot"] = extend_op<cpu_gemm, op::dot>(); apply_map["dot"] = extend_op<cpu_gemm, op::dot>();
apply_map["batch_norm_inference"] = apply_map["batch_norm_inference"] =
extend_op<cpu_batch_norm_inference, op::batch_norm_inference>(); extend_op<cpu_batch_norm_inference, op::batch_norm_inference>();
apply_map["lrn"] = extend_op<cpu_lrn, op::lrn>();
apply_map["contiguous"] = extend_op<cpu_contiguous, op::contiguous>(); apply_map["contiguous"] = extend_op<cpu_contiguous, op::contiguous>();
apply_map["pad"] = extend_op<cpu_pad, op::pad>();
apply_map["concat"] = extend_op<cpu_concat, op::concat>(); apply_map["concat"] = extend_op<cpu_concat, op::concat>();
apply_map["gather"] = extend_op<cpu_gather, op::gather>();
apply_map["leaky_relu"] = extend_op<cpu_unary<leaky_relu_op>, op::leaky_relu>(); apply_map["leaky_relu"] = extend_op<cpu_unary<leaky_relu_op>, op::leaky_relu>();
apply_map["elu"] = extend_op<cpu_unary<elu_op>, op::elu>();
apply_map["identity"] = simple_op<cpu_unary<identity_op>>(); apply_map["identity"] = simple_op<cpu_unary<identity_op>>();
apply_map["abs"] = simple_op<cpu_unary<abs_op>>();
apply_map["sinh"] = simple_op<cpu_unary<sinh_op>>();
apply_map["cosh"] = simple_op<cpu_unary<cosh_op>>();
apply_map["tanh"] = simple_op<cpu_unary<tanh_op>>(); apply_map["tanh"] = simple_op<cpu_unary<tanh_op>>();
apply_map["sigmoid"] = simple_op<cpu_unary<sigmoid_op>>(); apply_map["sigmoid"] = simple_op<cpu_unary<sigmoid_op>>();
apply_map["exp"] = simple_op<cpu_unary<exp_op>>(); apply_map["exp"] = simple_op<cpu_unary<exp_op>>();
apply_map["log"] = simple_op<cpu_unary<log_op>>();
apply_map["neg"] = simple_op<cpu_unary<neg_op>>(); apply_map["neg"] = simple_op<cpu_unary<neg_op>>();
apply_map["sin"] = simple_op<cpu_unary<sin_op>>(); apply_map["sin"] = simple_op<cpu_unary<sin_op>>();
apply_map["cos"] = simple_op<cpu_unary<cos_op>>(); apply_map["cos"] = simple_op<cpu_unary<cos_op>>();
apply_map["tan"] = simple_op<cpu_unary<tan_op>>(); apply_map["tan"] = simple_op<cpu_unary<tan_op>>();
apply_map["asin"] = simple_op<cpu_unary<asin_op>>();
apply_map["acos"] = simple_op<cpu_unary<acos_op>>();
apply_map["atan"] = simple_op<cpu_unary<atan_op>>();
apply_map["relu"] = simple_op<cpu_unary<relu_op>>(); apply_map["relu"] = simple_op<cpu_unary<relu_op>>();
apply_map["add"] = simple_op<cpu_binary<add_op>>(); apply_map["add"] = simple_op<cpu_binary<add_op>>();
apply_map["sub"] = simple_op<cpu_binary<sub_op>>(); apply_map["sub"] = simple_op<cpu_binary<sub_op>>();
apply_map["mul"] = simple_op<cpu_binary<mul_op>>(); apply_map["mul"] = simple_op<cpu_binary<mul_op>>();
apply_map["div"] = simple_op<cpu_binary<div_op>>(); apply_map["div"] = simple_op<cpu_binary<div_op>>();
apply_map["max"] = simple_op<cpu_binary<max_op>>();
apply_map["min"] = simple_op<cpu_binary<min_op>>();
apply_map["softmax"] = simple_op<softmax2d>(); apply_map["softmax"] = simple_op<softmax2d>();
} }
...@@ -658,5 +793,5 @@ struct cpu_apply ...@@ -658,5 +793,5 @@ struct cpu_apply
void lowering::apply(program& p) const { cpu_apply{&p}.apply(); } void lowering::apply(program& p) const { cpu_apply{&p}.apply(); }
} // namespace cpu } // namespace cpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
...@@ -2,18 +2,24 @@ ...@@ -2,18 +2,24 @@
#include <migraphx/cpu/target.hpp> #include <migraphx/cpu/target.hpp>
#include <migraphx/cpu/lowering.hpp> #include <migraphx/cpu/lowering.hpp>
#include <migraphx/auto_contiguous.hpp> #include <migraphx/auto_contiguous.hpp>
#include <migraphx/rewrite_rnn.hpp>
#include <migraphx/dead_code_elimination.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace cpu { namespace cpu {
std::string target::name() const { return "cpu"; } std::string target::name() const { return "cpu"; }
std::vector<pass> target::get_passes(migraphx::context&) const std::vector<pass> target::get_passes(migraphx::context&) const
{ {
return {auto_contiguous{}, lowering{}}; return {auto_contiguous{},
rewrite_rnn{},
dead_code_elimination{},
lowering{},
dead_code_elimination{}};
} }
} // namespace cpu } // namespace cpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
...@@ -12,15 +12,29 @@ endif() ...@@ -12,15 +12,29 @@ endif()
add_library(migraphx_device add_library(migraphx_device
device/add.cpp device/add.cpp
device/max.cpp
device/min.cpp
device/exp.cpp
device/log.cpp
device/sin.cpp device/sin.cpp
device/cos.cpp
device/tan.cpp
device/sinh.cpp
device/cosh.cpp
device/asin.cpp
device/acos.cpp
device/atan.cpp
device/add_relu.cpp device/add_relu.cpp
device/contiguous.cpp device/contiguous.cpp
device/mul.cpp device/mul.cpp
device/concat.cpp device/concat.cpp
device/pad.cpp
device/gather.cpp
device/sub.cpp
) )
set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device) set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device)
rocm_clang_tidy_check(migraphx_device) rocm_clang_tidy_check(migraphx_device)
target_link_libraries(migraphx_device migraphx hip::device -Wno-invalid-command-line-argument -amdgpu-target=gfx803 -amdgpu-target=gfx900 -amdgpu-target=gfx903) target_link_libraries(migraphx_device migraphx hip::device -Wno-invalid-command-line-argument -amdgpu-target=gfx803 -amdgpu-target=gfx900 -amdgpu-target=gfx906)
target_include_directories(migraphx_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>) target_include_directories(migraphx_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
target_include_directories(migraphx_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/device/include>) target_include_directories(migraphx_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/device/include>)
...@@ -38,12 +52,16 @@ add_library(migraphx_gpu ...@@ -38,12 +52,16 @@ add_library(migraphx_gpu
concat.cpp concat.cpp
relu.cpp relu.cpp
leaky_relu.cpp leaky_relu.cpp
add.cpp tanh.cpp
sin.cpp
mul.cpp
batchnorm.cpp batchnorm.cpp
write_literals.cpp write_literals.cpp
rocblas.cpp rocblas.cpp
sigmoid.cpp
abs.cpp
elu.cpp
pad.cpp
gather.cpp
lrn.cpp
) )
set_target_properties(migraphx_gpu PROPERTIES EXPORT_NAME gpu) set_target_properties(migraphx_gpu PROPERTIES EXPORT_NAME gpu)
rocm_clang_tidy_check(migraphx_gpu) rocm_clang_tidy_check(migraphx_gpu)
......
#include <migraphx/gpu/abs.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <utility>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape miopen_abs::compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(2).not_broadcasted();
return inputs.at(1);
}
argument miopen_abs::compute(context& ctx,
const shape& output_shape,
const std::vector<argument>& args) const
{
float alpha = 1;
float beta = 0;
auto x_desc = make_tensor(args[0].get_shape());
auto y_desc = make_tensor(output_shape);
miopenActivationForward(ctx.get_stream().get_miopen(),
ad.get(),
&alpha,
x_desc.get(),
args[0].implicit(),
&beta,
y_desc.get(),
args[1].implicit());
return args[1];
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/add.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <utility>
namespace migraphx {
inline namespace MIGRAPH_INLINE_NS {
namespace gpu {
shape hip_add::compute_shape(const std::vector<shape>& inputs) const
{
// check_shapes{inputs, *this}.has(3).standard();
check_shapes{inputs, *this}.has(3);
return inputs.at(0);
}
argument hip_add::compute(context& ctx, const shape&, const std::vector<argument>& args) const
{
device::add(ctx.get_stream().get(), args[2], args[0], args[1]);
return args[2];
}
shape miopen_add::compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(3).not_broadcasted();
return inputs.at(0);
}
argument miopen_add::compute(context& ctx,
const shape& output_shape,
const std::vector<argument>& args) const
{
float alpha = 1, beta = 0;
auto a_desc = make_tensor(args[0].get_shape());
auto b_desc = make_tensor(args[1].get_shape());
auto c_desc = make_tensor(output_shape);
miopenOpTensor(ctx.get_stream().get_miopen(),
miopenTensorOpAdd,
&alpha,
a_desc.get(),
args[0].implicit(),
&alpha,
b_desc.get(),
args[1].implicit(),
&beta,
c_desc.get(),
args[2].implicit());
return args[2];
}
} // namespace gpu
} // namespace MIGRAPH_INLINE_NS
} // namespace migraphx
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include <utility> #include <utility>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
shape miopen_batch_norm_inference::compute_shape(const std::vector<shape>& inputs) const shape miopen_batch_norm_inference::compute_shape(const std::vector<shape>& inputs) const
...@@ -22,7 +22,8 @@ argument miopen_batch_norm_inference::compute(context& ctx, ...@@ -22,7 +22,8 @@ argument miopen_batch_norm_inference::compute(context& ctx,
auto y_desc = make_tensor(output_shape); auto y_desc = make_tensor(output_shape);
auto bn_desc = make_tensor(args[3].get_shape()); auto bn_desc = make_tensor(args[3].get_shape());
float alpha = 1.0, beta = 0.0f; float alpha = 1.0;
float beta = 0.0f;
miopenBatchNormalizationForwardInference(ctx.get_stream().get_miopen(), miopenBatchNormalizationForwardInference(ctx.get_stream().get_miopen(),
miopenBatchNormMode_t(op.bn_mode), miopenBatchNormMode_t(op.bn_mode),
...@@ -43,5 +44,5 @@ argument miopen_batch_norm_inference::compute(context& ctx, ...@@ -43,5 +44,5 @@ argument miopen_batch_norm_inference::compute(context& ctx,
} }
} // namespace gpu } // namespace gpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
#include <utility> #include <utility>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
shape hip_concat::compute_shape(std::vector<shape> inputs) const shape hip_concat::compute_shape(std::vector<shape> inputs) const
...@@ -24,5 +24,5 @@ argument hip_concat::compute(context& ctx, ...@@ -24,5 +24,5 @@ argument hip_concat::compute(context& ctx,
} }
} // namespace gpu } // namespace gpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include <utility> #include <utility>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
shape miopen_contiguous::compute_shape(const std::vector<shape>& inputs) const shape miopen_contiguous::compute_shape(const std::vector<shape>& inputs) const
...@@ -25,5 +25,5 @@ argument miopen_contiguous::compute(context& ctx, ...@@ -25,5 +25,5 @@ argument miopen_contiguous::compute(context& ctx,
} }
} // namespace gpu } // namespace gpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include <utility> #include <utility>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
shape miopen_convolution::compute_shape(const std::vector<shape>& inputs) const shape miopen_convolution::compute_shape(const std::vector<shape>& inputs) const
...@@ -21,7 +21,8 @@ argument miopen_convolution::compute(context& ctx, ...@@ -21,7 +21,8 @@ argument miopen_convolution::compute(context& ctx,
auto w_desc = make_tensor(args[1].get_shape()); auto w_desc = make_tensor(args[1].get_shape());
auto y_desc = make_tensor(output_shape); auto y_desc = make_tensor(output_shape);
float alpha = 1, beta = 0; float alpha = 1;
float beta = 0;
miopenConvolutionForward(ctx.get_stream().get_miopen(), miopenConvolutionForward(ctx.get_stream().get_miopen(),
&alpha, &alpha,
x_desc.get(), x_desc.get(),
...@@ -40,11 +41,11 @@ argument miopen_convolution::compute(context& ctx, ...@@ -40,11 +41,11 @@ argument miopen_convolution::compute(context& ctx,
shape miopen_convolution::compile(context& ctx, shape miopen_convolution::compile(context& ctx,
const shape& output_shape, const shape& output_shape,
std::vector<instruction_ref> inputs) std::vector<shape> inputs)
{ {
shape workspace_shape{}; shape workspace_shape{};
auto x_desc = make_tensor(inputs[0]->get_shape()); auto x_desc = make_tensor(inputs[0]);
auto w_desc = make_tensor(inputs[1]->get_shape()); auto w_desc = make_tensor(inputs[1]);
auto y_desc = make_tensor(output_shape); auto y_desc = make_tensor(output_shape);
std::size_t workspace_size = 0; std::size_t workspace_size = 0;
...@@ -56,31 +57,44 @@ shape miopen_convolution::compile(context& ctx, ...@@ -56,31 +57,44 @@ shape miopen_convolution::compile(context& ctx,
&workspace_size); &workspace_size);
workspace_shape = shape{shape::int8_type, {workspace_size}}; workspace_shape = shape{shape::int8_type, {workspace_size}};
auto x = to_gpu(generate_argument(inputs[0]->get_shape())); auto x = to_gpu(generate_argument(inputs[0]));
auto w = to_gpu(generate_argument(inputs[1]->get_shape())); auto w = to_gpu(generate_argument(inputs[1]));
auto y = allocate_gpu(output_shape); auto y = allocate_gpu(output_shape);
auto workspace = allocate_gpu(workspace_shape); auto workspace = allocate_gpu(workspace_shape);
int algo_count = 1; int algo_count = 1;
miopenConvAlgoPerf_t perf; miopenConvAlgoPerf_t perf;
miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(), auto status = miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(),
x_desc.get(), x_desc.get(),
x.implicit(), x.implicit(),
w_desc.get(), w_desc.get(),
w.implicit(), w.implicit(),
cd.get(), cd.get(),
y_desc.get(), y_desc.get(),
y.implicit(), y.implicit(),
1, 1,
&algo_count, &algo_count,
&perf, &perf,
workspace.implicit(), workspace.implicit(),
workspace_size, workspace_size,
false); false);
algo = perf.fwd_algo; if(status != miopenStatusSuccess)
MIGRAPHX_THROW("Find convolution failed");
handle = ctx.get_stream().get_miopen();
algo = perf.fwd_algo;
return shape{shape::int8_type, {perf.memory}}; return shape{shape::int8_type, {perf.memory}};
} }
void miopen_convolution::finalize(context& ctx,
const shape& output_shape,
std::vector<shape> inputs)
{
if(handle == ctx.get_stream().get_miopen())
return;
// TODO: Check that workspace hasn't changed
compile(ctx, output_shape, std::move(inputs));
}
} // namespace gpu } // namespace gpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
#include <migraphx/gpu/device/acos.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void acos(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::acos(to_hip_type(x)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#include <migraphx/gpu/device/nary.hpp> #include <migraphx/gpu/device/nary.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
...@@ -22,5 +22,5 @@ void add(hipStream_t stream, ...@@ -22,5 +22,5 @@ void add(hipStream_t stream,
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#include <migraphx/gpu/device/nary.hpp> #include <migraphx/gpu/device/nary.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
...@@ -27,5 +27,5 @@ void add_relu(hipStream_t stream, ...@@ -27,5 +27,5 @@ void add_relu(hipStream_t stream,
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
#include <migraphx/gpu/device/asin.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void asin(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::asin(to_hip_type(x)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/device/atan.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void atan(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::atan(to_hip_type(x)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include <migraphx/gpu/device/launch.hpp> #include <migraphx/gpu/device/launch.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
...@@ -34,5 +34,5 @@ argument concat(hipStream_t stream, ...@@ -34,5 +34,5 @@ argument concat(hipStream_t stream,
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment