"include/ck/utility/array_multi_index.hpp" did not exist on "fcbb978828b308d8c367a3eeaebee485a61b548c"
Commit 95b5efeb authored by Khalique Ahmed's avatar Khalique Ahmed
Browse files

Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX into gemm_to_conv

parents 5e22d800 25a0e433
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include <migraphx/argument.hpp> #include <migraphx/argument.hpp>
#include <migraphx/stringutils.hpp> #include <migraphx/stringutils.hpp>
#include <migraphx/value.hpp> #include <migraphx/value.hpp>
#include <migraphx/dyn_output.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -62,9 +63,9 @@ struct unary : op_name<Derived> ...@@ -62,9 +63,9 @@ struct unary : op_name<Derived>
value attributes() const { return base_attributes(); } value attributes() const { return base_attributes(); }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(std::vector<shape> inputs) const
{ {
check_shapes{inputs, static_cast<const Derived&>(*this)}.has(1); check_shapes{inputs, static_cast<const Derived&>(*this), true}.has(1);
auto s = inputs.at(0); auto s = inputs.at(0);
if(s.scalar()) if(s.dynamic() or s.scalar())
{ {
return s; return s;
} }
...@@ -78,9 +79,9 @@ struct unary : op_name<Derived> ...@@ -78,9 +79,9 @@ struct unary : op_name<Derived>
} }
} }
argument compute(const shape& output_shape, std::vector<argument> args) const argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
{ {
argument result{output_shape}; argument result{dyn_out.computed_shape};
result.visit([&](auto output) { result.visit([&](auto output) {
args[0].visit([&](auto input) { args[0].visit([&](auto input) {
std::transform(input.begin(), std::transform(input.begin(),
......
...@@ -32,6 +32,8 @@ ...@@ -32,6 +32,8 @@
#include <utility> #include <utility>
#include <unordered_map> #include <unordered_map>
#include <migraphx/reflect.hpp> #include <migraphx/reflect.hpp>
#include <migraphx/dyn_output.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/streamutils.hpp> #include <migraphx/streamutils.hpp>
#include <migraphx/normalize_attributes.hpp> #include <migraphx/normalize_attributes.hpp>
#include <migraphx/argument.hpp> #include <migraphx/argument.hpp>
...@@ -199,9 +201,12 @@ auto compute_op(rank<1>, ...@@ -199,9 +201,12 @@ auto compute_op(rank<1>,
context& ctx, context& ctx,
const shape& output_shape, const shape& output_shape,
const std::vector<argument>& input) const std::vector<argument>& input)
-> decltype(x.compute(auto_any_cast(ctx), output_shape, input)) -> decltype(x.compute(auto_any_cast(ctx),
make_compute_output_shape(pack(x, output_shape, input)),
input))
{ {
return x.compute(auto_any_cast(ctx), output_shape, input); return x.compute(
auto_any_cast(ctx), make_compute_output_shape(pack(x, output_shape, input)), input);
} }
template <class T> template <class T>
...@@ -220,9 +225,9 @@ compute_op(const T& x, context& ctx, const shape& output_shape, const std::vecto ...@@ -220,9 +225,9 @@ compute_op(const T& x, context& ctx, const shape& output_shape, const std::vecto
template <class T> template <class T>
auto compute_op(rank<1>, const T& x, const shape& output_shape, const std::vector<argument>& input) auto compute_op(rank<1>, const T& x, const shape& output_shape, const std::vector<argument>& input)
-> decltype(x.compute(output_shape, input)) -> decltype(x.compute(make_compute_output_shape(pack(x, output_shape, input)), input))
{ {
return x.compute(output_shape, input); return x.compute(make_compute_output_shape(pack(x, output_shape, input)), input);
} }
template <class T> template <class T>
...@@ -244,9 +249,11 @@ auto compute_op(rank<1>, ...@@ -244,9 +249,11 @@ auto compute_op(rank<1>,
const shape& output, const shape& output,
const std::vector<argument>& inputs, const std::vector<argument>& inputs,
const std::vector<module_ref>& module_args, const std::vector<module_ref>& module_args,
F f) -> decltype(x.compute(output, inputs, module_args, f)) F f)
-> decltype(
x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs, module_args, f))
{ {
return x.compute(output, inputs, module_args, f); return x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs, module_args, f);
} }
template <class T, class F> template <class T, class F>
...@@ -278,9 +285,17 @@ auto compute_op(rank<4>, ...@@ -278,9 +285,17 @@ auto compute_op(rank<4>,
const shape& output, const shape& output,
const std::vector<argument>& inputs, const std::vector<argument>& inputs,
const std::vector<module_ref>& module_args, const std::vector<module_ref>& module_args,
F f) -> decltype(x.compute(auto_any_cast(ctx), output, inputs, module_args, f)) F f) -> decltype(x.compute(auto_any_cast(ctx),
make_compute_output_shape(pack(x, output, inputs)),
inputs,
module_args,
f))
{ {
return x.compute(auto_any_cast(ctx), output, inputs, module_args, f); return x.compute(auto_any_cast(ctx),
make_compute_output_shape(pack(x, output, inputs)),
inputs,
module_args,
f);
} }
template <class T, class F> template <class T, class F>
...@@ -290,9 +305,11 @@ auto compute_op(rank<3>, ...@@ -290,9 +305,11 @@ auto compute_op(rank<3>,
const shape& output, const shape& output,
const std::vector<argument>& inputs, const std::vector<argument>& inputs,
const std::vector<module_ref>& module_args, const std::vector<module_ref>& module_args,
F f) -> decltype(x.compute(output, inputs, module_args, f)) F f)
-> decltype(
x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs, module_args, f))
{ {
return x.compute(output, inputs, module_args, f); return x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs, module_args, f);
} }
template <class T, class F> template <class T, class F>
...@@ -302,9 +319,10 @@ auto compute_op(rank<2>, ...@@ -302,9 +319,10 @@ auto compute_op(rank<2>,
const shape& output, const shape& output,
const std::vector<argument>& inputs, const std::vector<argument>& inputs,
const std::vector<module_ref>&, const std::vector<module_ref>&,
F) -> decltype(x.compute(output, inputs)) F)
-> decltype(x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs))
{ {
return x.compute(output, inputs); return x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs);
} }
template <class T, class F> template <class T, class F>
...@@ -314,9 +332,12 @@ auto compute_op(rank<1>, ...@@ -314,9 +332,12 @@ auto compute_op(rank<1>,
const shape& output, const shape& output,
const std::vector<argument>& inputs, const std::vector<argument>& inputs,
const std::vector<module_ref>&, const std::vector<module_ref>&,
F) -> decltype(x.compute(auto_any_cast(ctx), output, inputs)) F) -> decltype(x.compute(auto_any_cast(ctx),
make_compute_output_shape(pack(x, output, inputs)),
inputs))
{ {
return x.compute(auto_any_cast(ctx), output, inputs); return x.compute(
auto_any_cast(ctx), make_compute_output_shape(pack(x, output, inputs)), inputs);
} }
template <class T, class F> template <class T, class F>
...@@ -348,7 +369,8 @@ auto is_context_free_op(rank<1>, ...@@ -348,7 +369,8 @@ auto is_context_free_op(rank<1>,
const T& x, const T& x,
const shape& output_shape, const shape& output_shape,
const std::vector<argument>& input) const std::vector<argument>& input)
-> decltype(x.compute(output_shape, input), std::true_type{}); -> decltype(x.compute(make_compute_output_shape(pack(x, output_shape, input)), input),
std::true_type{});
template <class T> template <class T>
auto is_context_free_op(rank<0>, const T&, const shape&, const std::vector<argument>&) auto is_context_free_op(rank<0>, const T&, const shape&, const std::vector<argument>&)
......
...@@ -25,7 +25,6 @@ ...@@ -25,7 +25,6 @@
#include <migraphx/file_buffer.hpp> #include <migraphx/file_buffer.hpp>
#include <migraphx/json.hpp> #include <migraphx/json.hpp>
#include <migraphx/msgpack.hpp> #include <migraphx/msgpack.hpp>
#include <migraphx/file_buffer.hpp>
#include <fstream> #include <fstream>
namespace migraphx { namespace migraphx {
......
...@@ -34,7 +34,6 @@ ...@@ -34,7 +34,6 @@
#include <migraphx/pass_manager.hpp> #include <migraphx/pass_manager.hpp>
#include <migraphx/make_op.hpp> #include <migraphx/make_op.hpp>
#include <migraphx/register_target.hpp> #include <migraphx/register_target.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/json.hpp> #include <migraphx/json.hpp>
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
......
...@@ -30,7 +30,7 @@ namespace onnx { ...@@ -30,7 +30,7 @@ namespace onnx {
void recalc_conv_attributes(value& v, size_t kdims) void recalc_conv_attributes(value& v, size_t kdims)
{ {
if(not(v["padding"].size() == kdims or v["padding"].size() == kdims * 2)) if(v["padding"].size() != kdims and v["padding"].size() != kdims * 2)
{ {
v["padding"].resize(kdims); v["padding"].resize(kdims);
std::fill_n(v["padding"].begin(), kdims, 0); std::fill_n(v["padding"].begin(), kdims, 0);
......
...@@ -46,9 +46,6 @@ ...@@ -46,9 +46,6 @@
#include <migraphx/iterator_for.hpp> #include <migraphx/iterator_for.hpp>
#include <migraphx/dfor.hpp> #include <migraphx/dfor.hpp>
#include <migraphx/ranges.hpp> #include <migraphx/ranges.hpp>
#include <migraphx/op/common.hpp>
#include <migraphx/op/rnn_var_sl_last_output.hpp>
#include <migraphx/op/rnn_variable_seq_lens.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
......
...@@ -827,7 +827,7 @@ MIGRAPHX_PRED_MATCHER(horiz_conv_dot, instruction_ref ins) ...@@ -827,7 +827,7 @@ MIGRAPHX_PRED_MATCHER(horiz_conv_dot, instruction_ref ins)
}; };
auto dots = std::count_if(ins->outputs().begin(), ins->outputs().end(), pred("dot")); auto dots = std::count_if(ins->outputs().begin(), ins->outputs().end(), pred("dot"));
auto convs = std::count_if(ins->outputs().begin(), ins->outputs().end(), pred("convolution")); auto convs = std::count_if(ins->outputs().begin(), ins->outputs().end(), pred("convolution"));
return not(dots < 2 and convs < 2); return (dots >= 2 or convs >= 2);
} }
struct find_conv_dot_horiz_fusion struct find_conv_dot_horiz_fusion
......
...@@ -215,55 +215,6 @@ struct cpu_pad ...@@ -215,55 +215,6 @@ struct cpu_pad
}; };
MIGRAPHX_REGISTER_OP(cpu_pad) MIGRAPHX_REGISTER_OP(cpu_pad)
struct leaky_relu_op
{
op::leaky_relu op;
std::string name() const { return "cpu::leaky_relu"; }
auto fcn() const
{
auto a = op.alpha;
return [a](auto x) { return x > 0 ? x : x * a; };
}
};
template <typename Op>
struct cpu_unary2 : auto_register_op<cpu_unary2<Op>>
{
cpu_unary2() = default;
template <class T>
cpu_unary2(T pop) : op(Op{std::move(pop)})
{
}
Op op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op.op, f);
}
std::string name() const { return op.name(); }
shape compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(1);
const auto& s = inputs.at(0);
return {s.type(), s.lens()};
}
argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
visit_all(result, args[0])([&](auto output, auto input) {
assert(input.get_shape().standard());
std::transform(input.begin(), input.end(), output.begin(), op.fcn());
});
return result;
}
};
template struct cpu_unary2<leaky_relu_op>;
struct cpu_rnn_var_sl_last_output struct cpu_rnn_var_sl_last_output
{ {
op::rnn_var_sl_last_output op; op::rnn_var_sl_last_output op;
......
...@@ -41,7 +41,6 @@ ...@@ -41,7 +41,6 @@
#include <migraphx/rewrite_quantization.hpp> #include <migraphx/rewrite_quantization.hpp>
#include <migraphx/rewrite_rnn.hpp> #include <migraphx/rewrite_rnn.hpp>
#include <migraphx/schedule.hpp> #include <migraphx/schedule.hpp>
#include <migraphx/memory_coloring.hpp>
#include <migraphx/simplify_algebra.hpp> #include <migraphx/simplify_algebra.hpp>
#include <migraphx/simplify_qdq.hpp> #include <migraphx/simplify_qdq.hpp>
#include <migraphx/simplify_reshapes.hpp> #include <migraphx/simplify_reshapes.hpp>
......
...@@ -85,7 +85,6 @@ add_library(migraphx_gpu ...@@ -85,7 +85,6 @@ add_library(migraphx_gpu
compile_hip_code_object.cpp compile_hip_code_object.cpp
compiler.cpp compiler.cpp
device_name.cpp device_name.cpp
elu.cpp
fuse_mlir.cpp fuse_mlir.cpp
fuse_ops.cpp fuse_ops.cpp
gather.cpp gather.cpp
...@@ -98,7 +97,6 @@ add_library(migraphx_gpu ...@@ -98,7 +97,6 @@ add_library(migraphx_gpu
logsoftmax.cpp logsoftmax.cpp
loop.cpp loop.cpp
lrn.cpp lrn.cpp
leaky_relu.cpp
mlir.cpp mlir.cpp
multinomial.cpp multinomial.cpp
nonzero.cpp nonzero.cpp
...@@ -143,9 +141,7 @@ register_migraphx_gpu_ops(hip_ ...@@ -143,9 +141,7 @@ register_migraphx_gpu_ops(hip_
register_migraphx_gpu_ops(miopen_ register_migraphx_gpu_ops(miopen_
abs abs
contiguous contiguous
elu
int8_conv_pack int8_conv_pack
leaky_relu
lrn lrn
pooling pooling
) )
......
...@@ -144,9 +144,8 @@ compute_global_for(context& ctx, std::size_t n, std::size_t over) ...@@ -144,9 +144,8 @@ compute_global_for(context& ctx, std::size_t n, std::size_t over)
std::size_t compute_block_size(std::size_t n, std::size_t max_block_size) std::size_t compute_block_size(std::size_t n, std::size_t max_block_size)
{ {
const std::size_t min_block_size = 64; const std::size_t min_block_size = 64;
const std::size_t base_block_size = 32; auto block_size = (((n - 1) / min_block_size + 1)) * min_block_size;
auto block_size = (((n - 1) / base_block_size + 1)) * base_block_size;
return std::min(std::max(min_block_size, block_size), max_block_size); return std::min(std::max(min_block_size, block_size), max_block_size);
} }
......
...@@ -183,8 +183,8 @@ argument register_on_gpu(const argument& arg) ...@@ -183,8 +183,8 @@ argument register_on_gpu(const argument& arg)
{ {
auto arg_shared = arg.share(); auto arg_shared = arg.share();
auto p = register_on_gpu(arg_shared.data(), arg_shared.get_shape().bytes()); auto p = register_on_gpu(arg_shared.data(), arg_shared.get_shape().bytes());
return {arg_shared.get_shape(), auto s = arg_shared.get_shape();
[p, a = std::move(arg_shared)]() mutable { return get_device_ptr(p.get()); }}; return {s, [p, a = std::move(arg_shared)]() mutable { return get_device_ptr(p.get()); }};
} }
argument to_gpu(const argument& arg, bool host) argument to_gpu(const argument& arg, bool host)
......
...@@ -21,44 +21,80 @@ ...@@ -21,44 +21,80 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#include <migraphx/gpu/elu.hpp> #include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/float_equal.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
shape miopen_elu::compute_shape(const std::vector<shape>& inputs) const using namespace migraphx::gpu::gen; // NOLINT
static const char* const pointwise_kernel = R"__migraphx__(
#include <migraphx/kernels/pad.hpp>
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/ops.hpp>
#include <args.hpp>
namespace migraphx {
extern "C" {
__global__ void pad_kernel(void* input_p, void* output_p)
{ {
check_shapes{inputs, *this}.has(2).not_broadcasted(); auto offsets = index_ints<${offsets}>{};
return inputs.at(1); auto idx = make_index();
make_tensors()(input_p, output_p)([&](auto input, auto output) {
pad(idx, offsets, input, output, ${pad_val});
});
}
} }
argument miopen_elu::compute(context& ctx, } // namespace migraphx
const shape& output_shape,
const std::vector<argument>& args) const
{
float alpha = 1;
float beta = 0;
auto x_desc = make_tensor(args[0].get_shape());
auto y_desc = make_tensor(output_shape);
miopenActivationForward(ctx.get_stream().get_miopen(),
ad.get(),
&alpha,
x_desc.get(),
args[0].implicit(),
&beta,
y_desc.get(),
args[1].implicit());
return args[1]; )__migraphx__";
}
void miopen_elu::finalize(context&, const shape&, const std::vector<shape>&) struct pad_compiler : compiler<pad_compiler>
{ {
ad = make_elu(op.alpha); std::vector<std::string> names() const { return {"pad"}; }
}
operation compile_op(context& ctx, const std::vector<shape>& inputs, const value& v) const
{
hip_compile_options options;
options.inputs = inputs;
options.output = inputs.back();
options.virtual_inputs = reduce_dims(inputs);
options.kernel_name = "pad_kernel";
options.set_launch_params(v, compute_global_for(ctx, inputs.at(1).elements()));
auto pad_val = v.get("value", 0.f);
auto pad_val_string = to_string(pad_val);
if(float_equal(pad_val, std::numeric_limits<float>::lowest()))
pad_val_string = "lowest{}";
if(float_equal(pad_val, std::numeric_limits<float>::max()))
pad_val_string = "highest{}";
auto padding = v.at("pads").to_vector<int64_t>();
auto input_lens = inputs.front().lens();
std::vector<size_t> offsets(input_lens.size());
std::copy(padding.begin(), padding.begin() + offsets.size(), offsets.begin());
auto src = interpolate_string(
pointwise_kernel,
{{"pad_val", to_string(pad_val_string)}, {"offsets", to_string_range(offsets)}});
return compile_hip_code_object(src, options);
}
compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const
{
return replace(compile_op(ctx, to_shapes(ins->inputs()), op.to_value()));
}
};
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
...@@ -79,9 +79,10 @@ struct scatternd_compiler : compiler<scatternd_compiler> ...@@ -79,9 +79,10 @@ struct scatternd_compiler : compiler<scatternd_compiler>
{ {
assert(starts_with(op.name(), "scatternd_")); assert(starts_with(op.name(), "scatternd_"));
auto reduction = op.name().substr(10); auto reduction = op.name().substr(10);
return insert(compile_op(ctx, return insert(compile_op(
to_shapes({ins->inputs().begin() + 1, ins->inputs().end()}), ctx,
{{"reduction", reduction}})); to_shapes(std::vector<instruction_ref>{ins->inputs().begin() + 1, ins->inputs().end()}),
{{"reduction", reduction}}));
} }
compiler_replace insert(const operation& op) const compiler_replace insert(const operation& op) const
......
...@@ -21,44 +21,43 @@ ...@@ -21,44 +21,43 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#ifndef MIGRAPHX_GUARD_RTGLIB_LEAKY_RELU_HPP #ifndef MIGRAPHX_GUARD_KERNELS_PAD_HPP
#define MIGRAPHX_GUARD_RTGLIB_LEAKY_RELU_HPP #define MIGRAPHX_GUARD_KERNELS_PAD_HPP
#include <migraphx/op/leaky_relu.hpp> #include <migraphx/kernels/shape.hpp>
#include <migraphx/shape.hpp> #include <migraphx/kernels/index.hpp>
#include <migraphx/reflect.hpp> #include <migraphx/kernels/algorithm.hpp>
#include <migraphx/gpu/miopen.hpp> #include <migraphx/kernels/ranges.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct context; template <class Offsets, class Input, class Output, class PadVal>
__device__ void pad(const index& idx,
struct miopen_leaky_relu const Offsets& offsets,
const Input& input,
Output& output,
const PadVal& pad_val)
{ {
op::leaky_relu op; auto output_shape = output.get_shape();
shared<activation_descriptor> ad; idx.global_stride(output_shape.elements(), [&](auto i) {
// 1. get current multi-index for output
template <class Self, class F> // 2. get the size of the input to determine input boundaries
static auto reflect(Self& self, F f) // 3. compute the corresponding multi-index for input by accounting for offsets
{ // 4. if current multi-index is within offsets or input's new multi-index is out of bounds,
return migraphx::reflect(self.op, f); // use pad value instead of input's value
} auto multi = output_shape.multi(i);
auto input_bounds = input.get_shape().lens;
auto input_idx = multi - offsets;
auto range_multi = range(multi.size());
if(any_of(range_multi.begin(), range_multi.end(), [&](auto j) {
return multi[j] < offsets[j] or input_idx[j] >= input_bounds[j];
}))
output[multi] = pad_val;
else
output[multi] = input[input_idx];
});
}
std::string name() const { return "gpu::leaky_relu"; }
shape compute_shape(const std::vector<shape>& inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
void finalize(context&, const shape&, const std::vector<shape>&);
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
#endif #endif
...@@ -21,20 +21,29 @@ ...@@ -21,20 +21,29 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#ifndef MIGRAPHX_GUARD_KERNELS_RANGES_HPP
#define MIGRAPHX_GUARD_KERNELS_RANGES_HPP
#include "verify_program.hpp" #include <migraphx/kernels/iota_iterator.hpp>
#include <migraphx/program.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/make_op.hpp>
struct test_elu : verify_program<test_elu> namespace migraphx {
template <class Iterator>
struct iterator_range
{ {
migraphx::program create_program() const Iterator start;
{ Iterator last;
migraphx::program p;
auto* mm = p.get_main_module(); constexpr Iterator begin() const { return start; }
auto x = mm->add_parameter("x", migraphx::shape{migraphx::shape::float_type, {4, 3, 3, 3}});
mm->add_instruction(migraphx::make_op("leaky_relu", {{"alpha", 1.0}}), x); constexpr Iterator end() const { return last; }
return p;
}
}; };
constexpr iterator_range<iota_iterator> range(diff_int start, diff_int last)
{
return {{start, {}}, {last, {}}};
}
constexpr iterator_range<iota_iterator> range(diff_int last) { return range(0, last); }
} // namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_RANGES_HPP
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/leaky_relu.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/miopen.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape miopen_leaky_relu::compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(2).not_broadcasted();
return inputs.at(1);
}
argument miopen_leaky_relu::compute(context& ctx,
const shape& output_shape,
const std::vector<argument>& args) const
{
float alpha = 1;
float beta = 0;
auto x_desc = make_tensor(args[0].get_shape());
auto y_desc = make_tensor(output_shape);
miopenActivationForward(ctx.get_stream().get_miopen(),
ad.get(),
&alpha,
x_desc.get(),
args[0].implicit(),
&beta,
y_desc.get(),
args[1].implicit());
return args[1];
}
void miopen_leaky_relu::finalize(context&, const shape&, const std::vector<shape>&)
{
ad = make_leaky_relu(op.alpha);
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -95,14 +95,11 @@ struct miopen_apply ...@@ -95,14 +95,11 @@ struct miopen_apply
add_extend_op("argmax"); add_extend_op("argmax");
add_extend_op("argmin"); add_extend_op("argmin");
add_extend_op("elu");
add_extend_op("gather"); add_extend_op("gather");
add_extend_op("leaky_relu");
add_extend_op("logsoftmax"); add_extend_op("logsoftmax");
add_extend_op("lrn"); add_extend_op("lrn");
add_extend_op("multinomial"); add_extend_op("multinomial");
add_extend_op("nonzero"); add_extend_op("nonzero");
add_extend_op("pad");
add_extend_op("pooling"); add_extend_op("pooling");
add_extend_op("prefix_scan_sum"); add_extend_op("prefix_scan_sum");
add_extend_op("reverse"); add_extend_op("reverse");
......
...@@ -196,6 +196,7 @@ struct mlir_program ...@@ -196,6 +196,7 @@ struct mlir_program
MlirType make_tensor(const shape& s) const MlirType make_tensor(const shape& s) const
{ {
assert(s.standard());
std::vector<int64_t> lens(s.lens().begin(), s.lens().end()); std::vector<int64_t> lens(s.lens().begin(), s.lens().end());
return mlirRankedTensorTypeGet( return mlirRankedTensorTypeGet(
lens.size(), lens.data(), make_type(s.type()), mlirAttributeGetNull()); lens.size(), lens.data(), make_type(s.type()), mlirAttributeGetNull());
...@@ -371,7 +372,11 @@ struct mlir_program ...@@ -371,7 +372,11 @@ struct mlir_program
mlir_operation_state& add_results(const std::vector<shape>& outputs) mlir_operation_state& add_results(const std::vector<shape>& outputs)
{ {
auto x = prog->make_tensors(outputs); std::vector<shape> reshaped(outputs.size());
std::transform(outputs.begin(), outputs.end(), reshaped.begin(), [](const shape& r) {
return shape{r.type(), r.lens()};
});
auto x = prog->make_tensors(reshaped);
mlirOperationStateAddResults(&op_state, x.size(), x.data()); mlirOperationStateAddResults(&op_state, x.size(), x.data());
return *this; return *this;
} }
......
...@@ -141,12 +141,12 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti ...@@ -141,12 +141,12 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination{}, dead_code_elimination{},
pack_int8_args{}, pack_int8_args{},
dead_code_elimination{}, dead_code_elimination{},
adjust_allocation{gpu_allocation_model{}},
dead_code_elimination{},
fuse_ops{&ctx, options.fast_math}, fuse_ops{&ctx, options.fast_math},
dead_code_elimination{}, dead_code_elimination{},
replace_allocate{gpu_allocation_model{}, options.offload_copy}, replace_allocate{gpu_allocation_model{}, options.offload_copy},
dead_code_elimination{}, dead_code_elimination{},
adjust_allocation{gpu_allocation_model{}},
dead_code_elimination{},
compile_ops{&ctx}, compile_ops{&ctx},
dead_code_elimination{}, dead_code_elimination{},
write_literals{&ctx}, write_literals{&ctx},
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment