Commit bc5d7f75 authored by Paul's avatar Paul
Browse files

Merge from develop

parents 47c0854d a5b0afa0
#ifndef MIGRAPH_GUARD_RTGLIB_CPU_LOWERING_HPP
#define MIGRAPH_GUARD_RTGLIB_CPU_LOWERING_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_CPU_LOWERING_HPP
#define MIGRAPHX_GUARD_RTGLIB_CPU_LOWERING_HPP
#include <migraph/program.hpp>
#include <migraph/config.hpp>
#include <migraphx/program.hpp>
#include <migraphx/config.hpp>
namespace migraph {
inline namespace MIGRAPH_INLINE_NS {
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace cpu {
struct lowering
......@@ -15,7 +15,7 @@ struct lowering
};
} // namespace cpu
} // namespace MIGRAPH_INLINE_NS
} // namespace migraph
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_CPU_TARGET_HPP
#define MIGRAPHX_GUARD_MIGRAPHLIB_CPU_TARGET_HPP
#include <migraphx/program.hpp>
#include <migraphx/cpu/context.hpp>
#include <migraphx/config.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace cpu {
struct target
{
std::string name() const;
std::vector<pass> get_passes(migraphx::context& ctx) const;
migraphx::context get_context() const { return context{}; }
};
} // namespace cpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#include <migraph/cpu/lowering.hpp>
#include <migraph/instruction.hpp>
#include <migraph/dfor.hpp>
#include <migraph/operators.hpp>
#include <migraph/shape_for_each.hpp>
#include <migraph/iterator_for.hpp>
#include <migraph/cpu/gemm.hpp>
#include <migraphx/cpu/lowering.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/par_dfor.hpp>
#include <migraphx/cpu/gemm.hpp>
#include <unordered_map>
#include <utility>
namespace migraph {
inline namespace MIGRAPH_INLINE_NS {
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace cpu {
template <typename T>
......@@ -19,6 +20,14 @@ T zero(const T&)
return T(0);
}
template <class T>
typename std::conditional_t<std::is_integral<T>{}, std::make_signed<T>, std::enable_if<true, T>>::
type
make_signed(T x)
{
return x;
}
//
// cpu implemenataion of batch norm for inference
//
......@@ -64,7 +73,7 @@ struct cpu_batch_norm_inference
visit_all(output, input, mini_batch_mean, mini_batch_variance, arg_gamma, arg_bias)(
[&](auto result, auto buffer, auto mean, auto variance, auto gamma, auto bias) {
dfor(num_batch, num_channels, image_height, image_width)(
par_dfor(num_batch, num_channels, image_height, image_width)(
[&](std::size_t n, std::size_t c, std::size_t h, std::size_t w) {
assert((variance(c) + epsilon) > 0);
result(n, c, h, w) = gamma(c) * (buffer(n, c, h, w) - mean(c)) /
......@@ -79,7 +88,7 @@ struct cpu_batch_norm_inference
visit_all(output, input, mini_batch_mean, mini_batch_mean, arg_gamma, arg_bias)(
[&](auto result, auto buffer, auto mean, auto variance, auto gamma, auto bias) {
dfor(num_batch, num_channels, image_height, image_width)(
par_dfor(num_batch, num_channels, image_height, image_width)(
[&](std::size_t n, std::size_t c, std::size_t h, std::size_t w) {
assert((variance(c, h, w) + epsilon) > 0);
result(n, c, h, w) = gamma(c, h, w) *
......@@ -94,6 +103,43 @@ struct cpu_batch_norm_inference
}
};
struct cpu_lrn
{
op::lrn op;
std::string name() const { return "cpu::lrn"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, shape output_shape, std::vector<argument> args) const
{
argument result{output_shape};
visit_all(result, args[0])([&](auto output, auto input) {
int n_batch = output_shape.lens()[0];
int channels = output_shape.lens()[1];
int height = output_shape.lens()[2];
int width = output_shape.lens()[3];
float alphaoverarea = op.alpha / op.size;
int radius = (op.size - 1) / 2;
par_dfor(n_batch, height, width)([&](int b, int h, int w) {
float scale = 0;
dfor(channels)([&](int c) {
auto start = (c - radius) < 0 ? 0 : (c - radius);
auto end = (c + radius) > channels ? channels : (c + radius);
for(auto k = start; k < end; ++k)
{
scale += std::pow(input(b, k, h, w), 2);
}
scale *= alphaoverarea;
scale += op.bias;
scale = std::pow(scale, -op.beta);
output(b, c, h, w) = input(b, c, h, w) * scale;
});
});
});
return result;
}
};
struct cpu_convolution
{
op::convolution op;
......@@ -104,28 +150,33 @@ struct cpu_convolution
{
argument result{output_shape};
visit_all(result, args[0], args[1])([&](auto output, auto input, auto weights) {
auto in_h = input.get_shape().lens()[2];
auto in_w = input.get_shape().lens()[3];
auto wei_c = weights.get_shape().lens()[1];
auto wei_h = weights.get_shape().lens()[2];
auto wei_w = weights.get_shape().lens()[3];
dfor(output_shape.lens()[0],
output_shape.lens()[1],
output_shape.lens()[2],
output_shape.lens()[3])(
auto in = input.get_shape().lens();
auto in_h = in[2];
auto in_w = in[3];
auto wei = weights.get_shape().lens();
auto wei_n = wei[0];
auto wei_c = wei[1];
auto wei_h = wei[2];
auto wei_w = wei[3];
par_dfor(output_shape.lens()[0],
output_shape.lens()[1],
output_shape.lens()[2],
output_shape.lens()[3])(
[&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) {
const int start_x = i * op.stride[0] - op.padding[0];
const int start_y = j * op.stride[1] - op.padding[1];
const int start_x = i * op.stride[0] - op.padding[0];
const int start_y = j * op.stride[1] - op.padding[1];
const int group_id = w / (wei_n / op.group);
double acc = 0;
dfor(wei_c, wei_h, wei_w)([&](std::size_t k, std::size_t x, std::size_t y) {
const int in_x = start_x + x;
const int in_y = start_y + y;
const int in_x = start_x + x;
const int in_y = start_y + y;
const int in_ch = group_id * wei_c + k;
if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w)
{
acc += input(o, k, in_x, in_y) * weights(w, k, x, y);
acc += input(o, in_ch, in_x, in_y) * weights(w, k, x, y);
}
});
output(o, w, i, j) = acc;
......@@ -158,7 +209,8 @@ struct cpu_im2col
const std::size_t& stride_h = op.stride[0];
const std::size_t& stride_w = op.stride[1];
int kdiv2_h, kdiv2_w;
int kdiv2_h;
int kdiv2_w;
kdiv2_h = kernel_h / 2;
kdiv2_w = kernel_w / 2;
// calculate output sizes
......@@ -231,10 +283,10 @@ struct cpu_pooling
auto in_h = input.get_shape().lens()[2];
auto in_w = input.get_shape().lens()[3];
dfor(output_shape.lens()[0],
output_shape.lens()[1],
output_shape.lens()[2],
output_shape.lens()[3])(
par_dfor(output_shape.lens()[0],
output_shape.lens()[1],
output_shape.lens()[2],
output_shape.lens()[3])(
[&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) {
const int start_x0 = i * op.stride[0] - op.padding[0];
const int start_y0 = j * op.stride[1] - op.padding[1];
......@@ -271,14 +323,33 @@ struct cpu_contiguous
std::string name() const { return "cpu::contiguous"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{
return op.compute(output_shape, std::move(args));
}
};
struct cpu_pad
{
op::pad op;
std::string name() const { return "cpu::contiguous"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{
assert(output_shape.standard());
argument result{output_shape};
result.visit([&](auto output) { std::fill(output.begin(), output.end(), op.value); });
visit_all(result, args[0])([&](auto output, auto input) {
shape_for_each(output.get_shape(), [&](const auto& idx) {
output(idx.begin(), idx.end()) = input(idx.begin(), idx.end());
shape_for_each(input.get_shape(), [&](const auto& idx) {
std::vector<std::size_t> new_idx(idx.size());
std::transform(
idx.begin(), idx.end(), op.pads.begin(), new_idx.begin(), [](auto i, auto j) {
return i + j;
});
output(new_idx.begin(), new_idx.end()) = input(idx.begin(), idx.end());
});
});
return result;
}
};
......@@ -290,24 +361,7 @@ struct cpu_concat
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
std::vector<std::size_t> coffsets = op.compute_offsets(output_shape, args);
for(std::size_t l = 0; l < args.size(); l++)
{
auto argl = args[l];
std::size_t nelements = argl.get_shape().elements();
visit_all(result, argl)([&](auto output, auto input) {
auto slice_shape =
shape{output_shape.type(), input.get_shape().lens(), output_shape.strides()};
auto slice = make_view(slice_shape, output.data() + coffsets[l]);
// cppcheck-suppress useStlAlgorithm
for(std::size_t i = 0; i < nelements; i++)
{
slice[i] = input[i];
}
});
}
return result;
return op.compute(output_shape, std::move(args));
}
};
......@@ -325,6 +379,18 @@ struct cpu_gemm
}
};
struct cpu_gather
{
op::gather op;
std::string name() const { return "cpu::gather"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{
return op.compute(output_shape, std::move(args));
}
};
struct identity_op
{
std::string name() const { return "cpu::identity"; }
......@@ -339,7 +405,7 @@ struct abs_op
std::string name() const { return "cpu::abs"; }
auto fcn() const
{
return [](auto x) { return std::abs(x); };
return [](auto x) { return std::abs(make_signed(x)); };
}
};
......@@ -352,6 +418,15 @@ struct exp_op
}
};
struct log_op
{
std::string name() const { return "cpu::log"; }
auto fcn() const
{
return [](auto x) { return std::log(x); };
}
};
struct sin_op
{
std::string name() const { return "cpu::sin"; }
......@@ -406,6 +481,24 @@ struct atan_op
}
};
struct sinh_op
{
std::string name() const { return "cpu::sinh"; }
auto fcn() const
{
return [](auto x) { return std::sinh(x); };
}
};
struct cosh_op
{
std::string name() const { return "cpu::cosh"; }
auto fcn() const
{
return [](auto x) { return std::cosh(x); };
}
};
struct tanh_op
{
std::string name() const { return "cpu::tanh"; }
......@@ -453,6 +546,17 @@ struct leaky_relu_op
}
};
struct elu_op
{
op::elu op;
std::string name() const { return "cpu::elu"; }
auto fcn() const
{
auto& a = op.alpha;
return [a](auto x) { return x > 0 ? x : a * std::expm1(x); };
}
};
template <typename Op>
struct cpu_unary
{
......@@ -545,6 +649,24 @@ struct div_op
}
};
struct max_op
{
std::string name() const { return "max"; }
auto fcn() const
{
return [](auto x, auto y) { return std::max(x, y); };
}
};
struct min_op
{
std::string name() const { return "min"; }
auto fcn() const
{
return [](auto x, auto y) { return std::min(x, y); };
}
};
template <typename Op>
struct cpu_binary
{
......@@ -596,22 +718,35 @@ struct cpu_apply
apply_map["dot"] = extend_op<cpu_gemm, op::dot>();
apply_map["batch_norm_inference"] =
extend_op<cpu_batch_norm_inference, op::batch_norm_inference>();
apply_map["lrn"] = extend_op<cpu_lrn, op::lrn>();
apply_map["contiguous"] = extend_op<cpu_contiguous, op::contiguous>();
apply_map["pad"] = extend_op<cpu_pad, op::pad>();
apply_map["concat"] = extend_op<cpu_concat, op::concat>();
apply_map["gather"] = extend_op<cpu_gather, op::gather>();
apply_map["leaky_relu"] = extend_op<cpu_unary<leaky_relu_op>, op::leaky_relu>();
apply_map["elu"] = extend_op<cpu_unary<elu_op>, op::elu>();
apply_map["identity"] = simple_op<cpu_unary<identity_op>>();
apply_map["abs"] = simple_op<cpu_unary<abs_op>>();
apply_map["sinh"] = simple_op<cpu_unary<sinh_op>>();
apply_map["cosh"] = simple_op<cpu_unary<cosh_op>>();
apply_map["tanh"] = simple_op<cpu_unary<tanh_op>>();
apply_map["sigmoid"] = simple_op<cpu_unary<sigmoid_op>>();
apply_map["exp"] = simple_op<cpu_unary<exp_op>>();
apply_map["log"] = simple_op<cpu_unary<log_op>>();
apply_map["neg"] = simple_op<cpu_unary<neg_op>>();
apply_map["sin"] = simple_op<cpu_unary<sin_op>>();
apply_map["cos"] = simple_op<cpu_unary<cos_op>>();
apply_map["tan"] = simple_op<cpu_unary<tan_op>>();
apply_map["asin"] = simple_op<cpu_unary<asin_op>>();
apply_map["acos"] = simple_op<cpu_unary<acos_op>>();
apply_map["atan"] = simple_op<cpu_unary<atan_op>>();
apply_map["relu"] = simple_op<cpu_unary<relu_op>>();
apply_map["add"] = simple_op<cpu_binary<add_op>>();
apply_map["sub"] = simple_op<cpu_binary<sub_op>>();
apply_map["mul"] = simple_op<cpu_binary<mul_op>>();
apply_map["div"] = simple_op<cpu_binary<div_op>>();
apply_map["max"] = simple_op<cpu_binary<max_op>>();
apply_map["min"] = simple_op<cpu_binary<min_op>>();
apply_map["softmax"] = simple_op<softmax2d>();
}
......@@ -658,5 +793,5 @@ struct cpu_apply
void lowering::apply(program& p) const { cpu_apply{&p}.apply(); }
} // namespace cpu
} // namespace MIGRAPH_INLINE_NS
} // namespace migraph
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraph/cpu/target.hpp>
#include <migraph/cpu/lowering.hpp>
#include <migraph/auto_contiguous.hpp>
#include <migraphx/cpu/target.hpp>
#include <migraphx/cpu/lowering.hpp>
#include <migraphx/auto_contiguous.hpp>
#include <migraphx/rewrite_rnn.hpp>
#include <migraphx/dead_code_elimination.hpp>
namespace migraph {
inline namespace MIGRAPH_INLINE_NS {
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace cpu {
std::string target::name() const { return "cpu"; }
std::vector<pass> target::get_passes(migraph::context&) const
std::vector<pass> target::get_passes(migraphx::context&) const
{
return {auto_contiguous{}, lowering{}};
return {auto_contiguous{},
rewrite_rnn{},
dead_code_elimination{},
lowering{},
dead_code_elimination{}};
}
} // namespace cpu
} // namespace MIGRAPH_INLINE_NS
} // namespace migraph
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -10,20 +10,35 @@ if(NOT TARGET MIOpen)
message(SEND_ERROR "Cant find miopen")
endif()
add_library(migraph_device
add_library(migraphx_device
device/add.cpp
device/max.cpp
device/min.cpp
device/exp.cpp
device/log.cpp
device/sin.cpp
device/cos.cpp
device/tan.cpp
device/sinh.cpp
device/cosh.cpp
device/asin.cpp
device/acos.cpp
device/atan.cpp
device/add_relu.cpp
device/contiguous.cpp
device/mul.cpp
device/concat.cpp
device/pad.cpp
device/gather.cpp
device/sub.cpp
)
set_target_properties(migraph_device PROPERTIES EXPORT_NAME device)
rocm_clang_tidy_check(migraph_device)
target_link_libraries(migraph_device migraph hip::device)
target_include_directories(migraph_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
target_include_directories(migraph_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/device/include>)
set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device)
rocm_clang_tidy_check(migraphx_device)
target_link_libraries(migraphx_device migraphx hip::device -Wno-invalid-command-line-argument -amdgpu-target=gfx803 -amdgpu-target=gfx900 -amdgpu-target=gfx906)
target_include_directories(migraphx_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
target_include_directories(migraphx_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/device/include>)
add_library(migraph_gpu
add_library(migraphx_gpu
eliminate_workspace.cpp
fuse_ops.cpp
hip.cpp
......@@ -37,19 +52,24 @@ add_library(migraph_gpu
concat.cpp
relu.cpp
leaky_relu.cpp
add.cpp
mul.cpp
tanh.cpp
batchnorm.cpp
write_literals.cpp
rocblas.cpp
sigmoid.cpp
abs.cpp
elu.cpp
pad.cpp
gather.cpp
lrn.cpp
)
set_target_properties(migraph_gpu PROPERTIES EXPORT_NAME gpu)
rocm_clang_tidy_check(migraph_gpu)
target_link_libraries(migraph_gpu PUBLIC migraph MIOpen roc::rocblas)
target_link_libraries(migraph_gpu PRIVATE migraph_device)
set_target_properties(migraphx_gpu PROPERTIES EXPORT_NAME gpu)
rocm_clang_tidy_check(migraphx_gpu)
target_link_libraries(migraphx_gpu PUBLIC migraphx MIOpen roc::rocblas)
target_link_libraries(migraphx_gpu PRIVATE migraphx_device)
rocm_install_targets(
TARGETS migraph_gpu migraph_device
TARGETS migraphx_gpu migraphx_device
INCLUDE
${CMAKE_CURRENT_SOURCE_DIR}/include
)
......
#include <migraphx/gpu/abs.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <utility>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape miopen_abs::compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(2).not_broadcasted();
return inputs.at(1);
}
argument miopen_abs::compute(context& ctx,
const shape& output_shape,
const std::vector<argument>& args) const
{
float alpha = 1;
float beta = 0;
auto x_desc = make_tensor(args[0].get_shape());
auto y_desc = make_tensor(output_shape);
miopenActivationForward(ctx.get_stream().get_miopen(),
ad.get(),
&alpha,
x_desc.get(),
args[0].implicit(),
&beta,
y_desc.get(),
args[1].implicit());
return args[1];
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraph/gpu/add.hpp>
#include <migraph/operators.hpp>
#include <migraph/manage_ptr.hpp>
#include <migraph/config.hpp>
#include <migraph/gpu/miopen.hpp>
#include <utility>
namespace migraph {
inline namespace MIGRAPH_INLINE_NS {
namespace gpu {
shape hip_add::compute_shape(const std::vector<shape>& inputs) const
{
// check_shapes{inputs, *this}.has(3).standard();
check_shapes{inputs, *this}.has(3);
return inputs.at(0);
}
argument hip_add::compute(context& ctx, const shape&, const std::vector<argument>& args) const
{
device::add(ctx.get_stream().get(), args[2], args[0], args[1]);
return args[2];
}
shape miopen_add::compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(3).not_broadcasted();
return inputs.at(0);
}
argument miopen_add::compute(context& ctx,
const shape& output_shape,
const std::vector<argument>& args) const
{
float alpha = 1, beta = 0;
auto a_desc = make_tensor(args[0].get_shape());
auto b_desc = make_tensor(args[1].get_shape());
auto c_desc = make_tensor(output_shape);
miopenOpTensor(ctx.get_stream().get_miopen(),
miopenTensorOpAdd,
&alpha,
a_desc.get(),
args[0].implicit(),
&alpha,
b_desc.get(),
args[1].implicit(),
&beta,
c_desc.get(),
args[2].implicit());
return args[2];
}
} // namespace gpu
} // namespace MIGRAPH_INLINE_NS
} // namespace migraph
#include <migraph/gpu/batchnorm.hpp>
#include <migraph/operators.hpp>
#include <migraph/manage_ptr.hpp>
#include <migraph/gpu/miopen.hpp>
#include <migraphx/gpu/batchnorm.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <utility>
namespace migraph {
inline namespace MIGRAPH_INLINE_NS {
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape miopen_batch_norm_inference::compute_shape(const std::vector<shape>& inputs) const
......@@ -22,7 +22,8 @@ argument miopen_batch_norm_inference::compute(context& ctx,
auto y_desc = make_tensor(output_shape);
auto bn_desc = make_tensor(args[3].get_shape());
float alpha = 1.0, beta = 0.0f;
float alpha = 1.0;
float beta = 0.0f;
miopenBatchNormalizationForwardInference(ctx.get_stream().get_miopen(),
miopenBatchNormMode_t(op.bn_mode),
......@@ -43,5 +44,5 @@ argument miopen_batch_norm_inference::compute(context& ctx,
}
} // namespace gpu
} // namespace MIGRAPH_INLINE_NS
} // namespace migraph
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraph/gpu/concat.hpp>
#include <migraph/operators.hpp>
#include <migraph/manage_ptr.hpp>
#include <migraph/gpu/miopen.hpp>
#include <migraph/gpu/device/concat.hpp>
#include <migraphx/gpu/concat.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/device/concat.hpp>
#include <utility>
namespace migraph {
inline namespace MIGRAPH_INLINE_NS {
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape hip_concat::compute_shape(std::vector<shape> inputs) const
......@@ -24,5 +24,5 @@ argument hip_concat::compute(context& ctx,
}
} // namespace gpu
} // namespace MIGRAPH_INLINE_NS
} // namespace migraph
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraph/gpu/contiguous.hpp>
#include <migraph/operators.hpp>
#include <migraph/manage_ptr.hpp>
#include <migraph/gpu/miopen.hpp>
#include <migraphx/gpu/contiguous.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <utility>
namespace migraph {
inline namespace MIGRAPH_INLINE_NS {
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape miopen_contiguous::compute_shape(const std::vector<shape>& inputs) const
......@@ -25,5 +25,5 @@ argument miopen_contiguous::compute(context& ctx,
}
} // namespace gpu
} // namespace MIGRAPH_INLINE_NS
} // namespace migraph
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraph/gpu/convolution.hpp>
#include <migraph/operators.hpp>
#include <migraph/manage_ptr.hpp>
#include <migraph/gpu/miopen.hpp>
#include <migraphx/gpu/convolution.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <utility>
namespace migraph {
inline namespace MIGRAPH_INLINE_NS {
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape miopen_convolution::compute_shape(const std::vector<shape>& inputs) const
......@@ -21,7 +21,8 @@ argument miopen_convolution::compute(context& ctx,
auto w_desc = make_tensor(args[1].get_shape());
auto y_desc = make_tensor(output_shape);
float alpha = 1, beta = 0;
float alpha = 1;
float beta = 0;
miopenConvolutionForward(ctx.get_stream().get_miopen(),
&alpha,
x_desc.get(),
......@@ -40,11 +41,11 @@ argument miopen_convolution::compute(context& ctx,
shape miopen_convolution::compile(context& ctx,
const shape& output_shape,
std::vector<instruction_ref> inputs)
std::vector<shape> inputs)
{
shape workspace_shape{};
auto x_desc = make_tensor(inputs[0]->get_shape());
auto w_desc = make_tensor(inputs[1]->get_shape());
auto x_desc = make_tensor(inputs[0]);
auto w_desc = make_tensor(inputs[1]);
auto y_desc = make_tensor(output_shape);
std::size_t workspace_size = 0;
......@@ -56,31 +57,44 @@ shape miopen_convolution::compile(context& ctx,
&workspace_size);
workspace_shape = shape{shape::int8_type, {workspace_size}};
auto x = to_gpu(generate_argument(inputs[0]->get_shape()));
auto w = to_gpu(generate_argument(inputs[1]->get_shape()));
auto x = to_gpu(generate_argument(inputs[0]));
auto w = to_gpu(generate_argument(inputs[1]));
auto y = allocate_gpu(output_shape);
auto workspace = allocate_gpu(workspace_shape);
int algo_count = 1;
miopenConvAlgoPerf_t perf;
miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(),
x_desc.get(),
x.implicit(),
w_desc.get(),
w.implicit(),
cd.get(),
y_desc.get(),
y.implicit(),
1,
&algo_count,
&perf,
workspace.implicit(),
workspace_size,
false);
algo = perf.fwd_algo;
auto status = miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(),
x_desc.get(),
x.implicit(),
w_desc.get(),
w.implicit(),
cd.get(),
y_desc.get(),
y.implicit(),
1,
&algo_count,
&perf,
workspace.implicit(),
workspace_size,
false);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("Find convolution failed");
handle = ctx.get_stream().get_miopen();
algo = perf.fwd_algo;
return shape{shape::int8_type, {perf.memory}};
}
void miopen_convolution::finalize(context& ctx,
const shape& output_shape,
std::vector<shape> inputs)
{
if(handle == ctx.get_stream().get_miopen())
return;
// TODO: Check that workspace hasn't changed
compile(ctx, output_shape, std::move(inputs));
}
} // namespace gpu
} // namespace MIGRAPH_INLINE_NS
} // namespace migraph
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/device/acos.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void acos(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::acos(to_hip_type(x)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraph/gpu/device/add.hpp>
#include <migraph/gpu/device/nary.hpp>
#include <migraphx/gpu/device/add.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace migraph {
inline namespace MIGRAPH_INLINE_NS {
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
......@@ -22,5 +22,5 @@ void add(hipStream_t stream,
} // namespace device
} // namespace gpu
} // namespace MIGRAPH_INLINE_NS
} // namespace migraph
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraph/gpu/device/add_relu.hpp>
#include <migraph/gpu/device/nary.hpp>
#include <migraphx/gpu/device/add_relu.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace migraph {
inline namespace MIGRAPH_INLINE_NS {
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
......@@ -27,5 +27,5 @@ void add_relu(hipStream_t stream,
} // namespace device
} // namespace gpu
} // namespace MIGRAPH_INLINE_NS
} // namespace migraph
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/device/asin.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void asin(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::asin(to_hip_type(x)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/device/atan.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void atan(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::atan(to_hip_type(x)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraph/shape.hpp>
#include <migraph/argument.hpp>
#include <migraph/gpu/device/concat.hpp>
#include <migraph/gpu/device/tensor.hpp>
#include <migraph/gpu/device/launch.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/concat.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>
namespace migraph {
inline namespace MIGRAPH_INLINE_NS {
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
argument concat(hipStream_t stream,
const migraph::shape& output_shape,
std::vector<migraph::argument> args,
const migraphx::shape& output_shape,
std::vector<migraphx::argument> args,
std::vector<std::size_t> offsets)
{
for(std::size_t l = 0; l < args.size() - 1; l++)
......@@ -34,5 +34,5 @@ argument concat(hipStream_t stream,
} // namespace device
} // namespace gpu
} // namespace MIGRAPH_INLINE_NS
} // namespace migraph
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraph/gpu/device/contiguous.hpp>
#include <migraph/gpu/device/nary.hpp>
#include <migraphx/gpu/device/contiguous.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace migraph {
inline namespace MIGRAPH_INLINE_NS {
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
......@@ -14,5 +14,5 @@ void contiguous(hipStream_t stream, argument result, argument arg)
} // namespace device
} // namespace gpu
} // namespace MIGRAPH_INLINE_NS
} // namespace migraph
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/device/cos.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void cos(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::cos(to_hip_type(x)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/device/cosh.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void cosh(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::cosh(to_hip_type(x)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment