Commit 95b5efeb authored by Khalique Ahmed's avatar Khalique Ahmed
Browse files

Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX into gemm_to_conv

parents 5e22d800 25a0e433
......@@ -30,6 +30,7 @@
#include <migraphx/argument.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/value.hpp>
#include <migraphx/dyn_output.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......@@ -62,9 +63,9 @@ struct unary : op_name<Derived>
value attributes() const { return base_attributes(); }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, static_cast<const Derived&>(*this)}.has(1);
check_shapes{inputs, static_cast<const Derived&>(*this), true}.has(1);
auto s = inputs.at(0);
if(s.scalar())
if(s.dynamic() or s.scalar())
{
return s;
}
......@@ -78,9 +79,9 @@ struct unary : op_name<Derived>
}
}
argument compute(const shape& output_shape, std::vector<argument> args) const
argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
{
argument result{output_shape};
argument result{dyn_out.computed_shape};
result.visit([&](auto output) {
args[0].visit([&](auto input) {
std::transform(input.begin(),
......
......@@ -32,6 +32,8 @@
#include <utility>
#include <unordered_map>
#include <migraphx/reflect.hpp>
#include <migraphx/dyn_output.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/streamutils.hpp>
#include <migraphx/normalize_attributes.hpp>
#include <migraphx/argument.hpp>
......@@ -199,9 +201,12 @@ auto compute_op(rank<1>,
context& ctx,
const shape& output_shape,
const std::vector<argument>& input)
-> decltype(x.compute(auto_any_cast(ctx), output_shape, input))
-> decltype(x.compute(auto_any_cast(ctx),
make_compute_output_shape(pack(x, output_shape, input)),
input))
{
return x.compute(auto_any_cast(ctx), output_shape, input);
return x.compute(
auto_any_cast(ctx), make_compute_output_shape(pack(x, output_shape, input)), input);
}
template <class T>
......@@ -220,9 +225,9 @@ compute_op(const T& x, context& ctx, const shape& output_shape, const std::vecto
template <class T>
auto compute_op(rank<1>, const T& x, const shape& output_shape, const std::vector<argument>& input)
-> decltype(x.compute(output_shape, input))
-> decltype(x.compute(make_compute_output_shape(pack(x, output_shape, input)), input))
{
return x.compute(output_shape, input);
return x.compute(make_compute_output_shape(pack(x, output_shape, input)), input);
}
template <class T>
......@@ -244,9 +249,11 @@ auto compute_op(rank<1>,
const shape& output,
const std::vector<argument>& inputs,
const std::vector<module_ref>& module_args,
F f) -> decltype(x.compute(output, inputs, module_args, f))
F f)
-> decltype(
x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs, module_args, f))
{
return x.compute(output, inputs, module_args, f);
return x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs, module_args, f);
}
template <class T, class F>
......@@ -278,9 +285,17 @@ auto compute_op(rank<4>,
const shape& output,
const std::vector<argument>& inputs,
const std::vector<module_ref>& module_args,
F f) -> decltype(x.compute(auto_any_cast(ctx), output, inputs, module_args, f))
F f) -> decltype(x.compute(auto_any_cast(ctx),
make_compute_output_shape(pack(x, output, inputs)),
inputs,
module_args,
f))
{
return x.compute(auto_any_cast(ctx), output, inputs, module_args, f);
return x.compute(auto_any_cast(ctx),
make_compute_output_shape(pack(x, output, inputs)),
inputs,
module_args,
f);
}
template <class T, class F>
......@@ -290,9 +305,11 @@ auto compute_op(rank<3>,
const shape& output,
const std::vector<argument>& inputs,
const std::vector<module_ref>& module_args,
F f) -> decltype(x.compute(output, inputs, module_args, f))
F f)
-> decltype(
x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs, module_args, f))
{
return x.compute(output, inputs, module_args, f);
return x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs, module_args, f);
}
template <class T, class F>
......@@ -302,9 +319,10 @@ auto compute_op(rank<2>,
const shape& output,
const std::vector<argument>& inputs,
const std::vector<module_ref>&,
F) -> decltype(x.compute(output, inputs))
F)
-> decltype(x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs))
{
return x.compute(output, inputs);
return x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs);
}
template <class T, class F>
......@@ -314,9 +332,12 @@ auto compute_op(rank<1>,
const shape& output,
const std::vector<argument>& inputs,
const std::vector<module_ref>&,
F) -> decltype(x.compute(auto_any_cast(ctx), output, inputs))
F) -> decltype(x.compute(auto_any_cast(ctx),
make_compute_output_shape(pack(x, output, inputs)),
inputs))
{
return x.compute(auto_any_cast(ctx), output, inputs);
return x.compute(
auto_any_cast(ctx), make_compute_output_shape(pack(x, output, inputs)), inputs);
}
template <class T, class F>
......@@ -348,7 +369,8 @@ auto is_context_free_op(rank<1>,
const T& x,
const shape& output_shape,
const std::vector<argument>& input)
-> decltype(x.compute(output_shape, input), std::true_type{});
-> decltype(x.compute(make_compute_output_shape(pack(x, output_shape, input)), input),
std::true_type{});
template <class T>
auto is_context_free_op(rank<0>, const T&, const shape&, const std::vector<argument>&)
......
......@@ -25,7 +25,6 @@
#include <migraphx/file_buffer.hpp>
#include <migraphx/json.hpp>
#include <migraphx/msgpack.hpp>
#include <migraphx/file_buffer.hpp>
#include <fstream>
namespace migraphx {
......
......@@ -34,7 +34,6 @@
#include <migraphx/pass_manager.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/json.hpp>
#include <iostream>
#include <sstream>
......
......@@ -30,7 +30,7 @@ namespace onnx {
void recalc_conv_attributes(value& v, size_t kdims)
{
if(not(v["padding"].size() == kdims or v["padding"].size() == kdims * 2))
if(v["padding"].size() != kdims and v["padding"].size() != kdims * 2)
{
v["padding"].resize(kdims);
std::fill_n(v["padding"].begin(), kdims, 0);
......
......@@ -46,9 +46,6 @@
#include <migraphx/iterator_for.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/op/common.hpp>
#include <migraphx/op/rnn_var_sl_last_output.hpp>
#include <migraphx/op/rnn_variable_seq_lens.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......
......@@ -827,7 +827,7 @@ MIGRAPHX_PRED_MATCHER(horiz_conv_dot, instruction_ref ins)
};
auto dots = std::count_if(ins->outputs().begin(), ins->outputs().end(), pred("dot"));
auto convs = std::count_if(ins->outputs().begin(), ins->outputs().end(), pred("convolution"));
return not(dots < 2 and convs < 2);
return (dots >= 2 or convs >= 2);
}
struct find_conv_dot_horiz_fusion
......
......@@ -215,55 +215,6 @@ struct cpu_pad
};
MIGRAPHX_REGISTER_OP(cpu_pad)
struct leaky_relu_op
{
op::leaky_relu op;
std::string name() const { return "cpu::leaky_relu"; }
auto fcn() const
{
auto a = op.alpha;
return [a](auto x) { return x > 0 ? x : x * a; };
}
};
template <typename Op>
struct cpu_unary2 : auto_register_op<cpu_unary2<Op>>
{
cpu_unary2() = default;
template <class T>
cpu_unary2(T pop) : op(Op{std::move(pop)})
{
}
Op op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op.op, f);
}
std::string name() const { return op.name(); }
shape compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(1);
const auto& s = inputs.at(0);
return {s.type(), s.lens()};
}
argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
visit_all(result, args[0])([&](auto output, auto input) {
assert(input.get_shape().standard());
std::transform(input.begin(), input.end(), output.begin(), op.fcn());
});
return result;
}
};
template struct cpu_unary2<leaky_relu_op>;
struct cpu_rnn_var_sl_last_output
{
op::rnn_var_sl_last_output op;
......
......@@ -41,7 +41,6 @@
#include <migraphx/rewrite_quantization.hpp>
#include <migraphx/rewrite_rnn.hpp>
#include <migraphx/schedule.hpp>
#include <migraphx/memory_coloring.hpp>
#include <migraphx/simplify_algebra.hpp>
#include <migraphx/simplify_qdq.hpp>
#include <migraphx/simplify_reshapes.hpp>
......
......@@ -85,7 +85,6 @@ add_library(migraphx_gpu
compile_hip_code_object.cpp
compiler.cpp
device_name.cpp
elu.cpp
fuse_mlir.cpp
fuse_ops.cpp
gather.cpp
......@@ -98,7 +97,6 @@ add_library(migraphx_gpu
logsoftmax.cpp
loop.cpp
lrn.cpp
leaky_relu.cpp
mlir.cpp
multinomial.cpp
nonzero.cpp
......@@ -143,9 +141,7 @@ register_migraphx_gpu_ops(hip_
register_migraphx_gpu_ops(miopen_
abs
contiguous
elu
int8_conv_pack
leaky_relu
lrn
pooling
)
......
......@@ -144,9 +144,8 @@ compute_global_for(context& ctx, std::size_t n, std::size_t over)
std::size_t compute_block_size(std::size_t n, std::size_t max_block_size)
{
const std::size_t min_block_size = 64;
const std::size_t base_block_size = 32;
auto block_size = (((n - 1) / base_block_size + 1)) * base_block_size;
const std::size_t min_block_size = 64;
auto block_size = (((n - 1) / min_block_size + 1)) * min_block_size;
return std::min(std::max(min_block_size, block_size), max_block_size);
}
......
......@@ -183,8 +183,8 @@ argument register_on_gpu(const argument& arg)
{
auto arg_shared = arg.share();
auto p = register_on_gpu(arg_shared.data(), arg_shared.get_shape().bytes());
return {arg_shared.get_shape(),
[p, a = std::move(arg_shared)]() mutable { return get_device_ptr(p.get()); }};
auto s = arg_shared.get_shape();
return {s, [p, a = std::move(arg_shared)]() mutable { return get_device_ptr(p.get()); }};
}
argument to_gpu(const argument& arg, bool host)
......
......@@ -21,44 +21,80 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/elu.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/float_equal.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape miopen_elu::compute_shape(const std::vector<shape>& inputs) const
using namespace migraphx::gpu::gen; // NOLINT
static const char* const pointwise_kernel = R"__migraphx__(
#include <migraphx/kernels/pad.hpp>
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/ops.hpp>
#include <args.hpp>
namespace migraphx {
extern "C" {
__global__ void pad_kernel(void* input_p, void* output_p)
{
check_shapes{inputs, *this}.has(2).not_broadcasted();
return inputs.at(1);
auto offsets = index_ints<${offsets}>{};
auto idx = make_index();
make_tensors()(input_p, output_p)([&](auto input, auto output) {
pad(idx, offsets, input, output, ${pad_val});
});
}
}
argument miopen_elu::compute(context& ctx,
const shape& output_shape,
const std::vector<argument>& args) const
{
float alpha = 1;
float beta = 0;
auto x_desc = make_tensor(args[0].get_shape());
auto y_desc = make_tensor(output_shape);
miopenActivationForward(ctx.get_stream().get_miopen(),
ad.get(),
&alpha,
x_desc.get(),
args[0].implicit(),
&beta,
y_desc.get(),
args[1].implicit());
} // namespace migraphx
return args[1];
}
)__migraphx__";
void miopen_elu::finalize(context&, const shape&, const std::vector<shape>&)
struct pad_compiler : compiler<pad_compiler>
{
ad = make_elu(op.alpha);
}
std::vector<std::string> names() const { return {"pad"}; }
operation compile_op(context& ctx, const std::vector<shape>& inputs, const value& v) const
{
hip_compile_options options;
options.inputs = inputs;
options.output = inputs.back();
options.virtual_inputs = reduce_dims(inputs);
options.kernel_name = "pad_kernel";
options.set_launch_params(v, compute_global_for(ctx, inputs.at(1).elements()));
auto pad_val = v.get("value", 0.f);
auto pad_val_string = to_string(pad_val);
if(float_equal(pad_val, std::numeric_limits<float>::lowest()))
pad_val_string = "lowest{}";
if(float_equal(pad_val, std::numeric_limits<float>::max()))
pad_val_string = "highest{}";
auto padding = v.at("pads").to_vector<int64_t>();
auto input_lens = inputs.front().lens();
std::vector<size_t> offsets(input_lens.size());
std::copy(padding.begin(), padding.begin() + offsets.size(), offsets.begin());
auto src = interpolate_string(
pointwise_kernel,
{{"pad_val", to_string(pad_val_string)}, {"offsets", to_string_range(offsets)}});
return compile_hip_code_object(src, options);
}
compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const
{
return replace(compile_op(ctx, to_shapes(ins->inputs()), op.to_value()));
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -79,9 +79,10 @@ struct scatternd_compiler : compiler<scatternd_compiler>
{
assert(starts_with(op.name(), "scatternd_"));
auto reduction = op.name().substr(10);
return insert(compile_op(ctx,
to_shapes({ins->inputs().begin() + 1, ins->inputs().end()}),
{{"reduction", reduction}}));
return insert(compile_op(
ctx,
to_shapes(std::vector<instruction_ref>{ins->inputs().begin() + 1, ins->inputs().end()}),
{{"reduction", reduction}}));
}
compiler_replace insert(const operation& op) const
......
......@@ -21,44 +21,43 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_LEAKY_RELU_HPP
#define MIGRAPHX_GUARD_RTGLIB_LEAKY_RELU_HPP
#ifndef MIGRAPHX_GUARD_KERNELS_PAD_HPP
#define MIGRAPHX_GUARD_KERNELS_PAD_HPP
#include <migraphx/op/leaky_relu.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/kernels/shape.hpp>
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/algorithm.hpp>
#include <migraphx/kernels/ranges.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct context;
struct miopen_leaky_relu
template <class Offsets, class Input, class Output, class PadVal>
__device__ void pad(const index& idx,
const Offsets& offsets,
const Input& input,
Output& output,
const PadVal& pad_val)
{
op::leaky_relu op;
shared<activation_descriptor> ad;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
auto output_shape = output.get_shape();
idx.global_stride(output_shape.elements(), [&](auto i) {
// 1. get current multi-index for output
// 2. get the size of the input to determine input boundaries
// 3. compute the corresponding multi-index for input by accounting for offsets
// 4. if current multi-index is within offsets or input's new multi-index is out of bounds,
// use pad value instead of input's value
auto multi = output_shape.multi(i);
auto input_bounds = input.get_shape().lens;
auto input_idx = multi - offsets;
auto range_multi = range(multi.size());
if(any_of(range_multi.begin(), range_multi.end(), [&](auto j) {
return multi[j] < offsets[j] or input_idx[j] >= input_bounds[j];
}))
output[multi] = pad_val;
else
output[multi] = input[input_idx];
});
}
std::string name() const { return "gpu::leaky_relu"; }
shape compute_shape(const std::vector<shape>& inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
void finalize(context&, const shape&, const std::vector<shape>&);
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -21,20 +21,29 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_KERNELS_RANGES_HPP
#define MIGRAPHX_GUARD_KERNELS_RANGES_HPP
#include "verify_program.hpp"
#include <migraphx/program.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/kernels/iota_iterator.hpp>
struct test_elu : verify_program<test_elu>
namespace migraphx {
template <class Iterator>
struct iterator_range
{
migraphx::program create_program() const
{
migraphx::program p;
auto* mm = p.get_main_module();
auto x = mm->add_parameter("x", migraphx::shape{migraphx::shape::float_type, {4, 3, 3, 3}});
mm->add_instruction(migraphx::make_op("leaky_relu", {{"alpha", 1.0}}), x);
return p;
}
Iterator start;
Iterator last;
constexpr Iterator begin() const { return start; }
constexpr Iterator end() const { return last; }
};
constexpr iterator_range<iota_iterator> range(diff_int start, diff_int last)
{
return {{start, {}}, {last, {}}};
}
constexpr iterator_range<iota_iterator> range(diff_int last) { return range(0, last); }
} // namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_RANGES_HPP
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/leaky_relu.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/miopen.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape miopen_leaky_relu::compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(2).not_broadcasted();
return inputs.at(1);
}
argument miopen_leaky_relu::compute(context& ctx,
const shape& output_shape,
const std::vector<argument>& args) const
{
float alpha = 1;
float beta = 0;
auto x_desc = make_tensor(args[0].get_shape());
auto y_desc = make_tensor(output_shape);
miopenActivationForward(ctx.get_stream().get_miopen(),
ad.get(),
&alpha,
x_desc.get(),
args[0].implicit(),
&beta,
y_desc.get(),
args[1].implicit());
return args[1];
}
void miopen_leaky_relu::finalize(context&, const shape&, const std::vector<shape>&)
{
ad = make_leaky_relu(op.alpha);
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -95,14 +95,11 @@ struct miopen_apply
add_extend_op("argmax");
add_extend_op("argmin");
add_extend_op("elu");
add_extend_op("gather");
add_extend_op("leaky_relu");
add_extend_op("logsoftmax");
add_extend_op("lrn");
add_extend_op("multinomial");
add_extend_op("nonzero");
add_extend_op("pad");
add_extend_op("pooling");
add_extend_op("prefix_scan_sum");
add_extend_op("reverse");
......
......@@ -196,6 +196,7 @@ struct mlir_program
MlirType make_tensor(const shape& s) const
{
assert(s.standard());
std::vector<int64_t> lens(s.lens().begin(), s.lens().end());
return mlirRankedTensorTypeGet(
lens.size(), lens.data(), make_type(s.type()), mlirAttributeGetNull());
......@@ -371,7 +372,11 @@ struct mlir_program
mlir_operation_state& add_results(const std::vector<shape>& outputs)
{
auto x = prog->make_tensors(outputs);
std::vector<shape> reshaped(outputs.size());
std::transform(outputs.begin(), outputs.end(), reshaped.begin(), [](const shape& r) {
return shape{r.type(), r.lens()};
});
auto x = prog->make_tensors(reshaped);
mlirOperationStateAddResults(&op_state, x.size(), x.data());
return *this;
}
......
......@@ -141,12 +141,12 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination{},
pack_int8_args{},
dead_code_elimination{},
adjust_allocation{gpu_allocation_model{}},
dead_code_elimination{},
fuse_ops{&ctx, options.fast_math},
dead_code_elimination{},
replace_allocate{gpu_allocation_model{}, options.offload_copy},
dead_code_elimination{},
adjust_allocation{gpu_allocation_model{}},
dead_code_elimination{},
compile_ops{&ctx},
dead_code_elimination{},
write_literals{&ctx},
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment