Commit 1398bcc1 authored by kahmed10's avatar kahmed10 Committed by mvermeulen
Browse files

Add_clip fusion (#370)

* initial testing of add_clip fusion

* formatting

* clipped relu fusion

* formatting

* remove some executables, add fusion test

* formatting

* remove clipped_relu code

* fix clang-tidy

* revert changes to cmake files

* remove fusion from weight map

* formatting

* fix syntax error

* formatting

* fix syntax error

* fix syntax error

* formatting
parent 84a3f56e
......@@ -16,6 +16,18 @@ void mul_add_relu(hipStream_t stream,
[](auto x, auto a, auto b) { return std::max<decltype(a * x + b)>(0, a * x + b); });
}
void add_clip(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const float max,
const float min)
{
nary(stream, result, arg1, arg2)([max, min](auto x, auto y) {
return std::min<decltype(x + y)>(std::max<decltype(x)>(min, x + y), max);
});
}
void add_relu(hipStream_t stream,
const argument& result,
const argument& arg1,
......@@ -42,6 +54,19 @@ void add_tanh(hipStream_t stream,
nary(stream, result, arg1, arg2)([](auto x, auto y) { return ::tanh(to_hip_type(x + y)); });
}
void add_clip(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3,
const float max,
const float min)
{
nary(stream, result, arg1, arg2, arg3)([max, min](auto x, auto y, auto z) {
return std::min<decltype(x + y + z)>(std::max<decltype(x)>(min, x + y + z), max);
});
}
void add_relu(hipStream_t stream,
const argument& result,
const argument& arg1,
......
#include <migraphx/gpu/fuse_ops.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/clip.hpp>
#include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/mul_add.hpp>
......@@ -8,6 +9,7 @@
#include <migraphx/gpu/device/add.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/array.hpp>
#include <migraphx/op/clip.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......@@ -170,6 +172,65 @@ struct hip_triadd
}
};
struct hip_triadd_clip
{
op::clip op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return op::clip::reflect(self.op, f);
}
std::string name() const { return "hip::triadd_clip"; }
shape compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(4);
return inputs.front();
}
argument compute(context& ctx, const shape&, const std::vector<argument>& args) const
{
device::add_clip(ctx.get_stream().get(),
args.at(3),
args.at(0),
args.at(1),
args.at(2),
op.max_val,
op.min_val);
return args.at(3);
}
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
struct hip_add_clip
{
op::clip op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return op::clip::reflect(self.op, f);
}
std::string name() const { return "hip::add_clip"; }
shape compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(3);
return inputs.front();
}
argument compute(context& ctx, const shape&, const std::vector<argument>& args) const
{
device::add_clip(
ctx.get_stream().get(), args.at(2), args.at(0), args.at(1), op.max_val, op.min_val);
return args.at(2);
}
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
struct hip_triadd_relu : ternary_device<hip_triadd_relu, &device::add_relu>
{
};
......@@ -253,6 +314,35 @@ void move_standard_front(std::vector<instruction_ref>& args)
std::swap(*it, args.front());
}
struct find_add_clip
{
auto matcher() const
{
return match::name(std::unordered_set<std::string>{"gpu::clip", "gpu::clipped_relu"})(
match::arg(0)(match::any_of(match::name("gpu::add"),
match::name("hip::triadd"),
match::any_of[match::inputs()](match::standard_shape()))
.bind("add")));
}
void apply(program& p, match::matcher_result r) const
{
auto add_ins = r.instructions["add"];
auto ins = r.result;
auto&& op = any_cast<gpu::hip_clip>(ins->get_operator()).op;
auto args = add_ins->inputs();
move_standard_front(args);
move_broadcasted_back(args);
// Use the allocation from the relu operator
args.back() = ins->inputs().back();
if(add_ins->name() == "gpu::add")
p.replace_instruction(ins, hip_add_clip{op}, args);
else if(add_ins->name() == "hip::triadd")
p.replace_instruction(ins, hip_triadd_clip{op}, args);
}
};
struct find_add_unary
{
std::string op_name;
......@@ -490,7 +580,8 @@ struct find_conv_bias
context* ctx = nullptr;
auto matcher() const
{
return conv_bias(match::none_of(match::output(match::name("gpu::relu"))));
return conv_bias(match::none_of(
match::output(match::name(std::unordered_set<std::string>{"gpu::relu"}))));
}
void apply(program& p, match::matcher_result r) const
......@@ -521,7 +612,8 @@ void fuse_ops::apply(program& p) const
find_mul_add_relu{},
find_add_unary{"gpu::relu", hip_add_relu{}, hip_triadd_relu{}},
find_add_unary{"gpu::sigmoid", hip_add_sigmoid{}, hip_triadd_sigmoid{}},
find_add_unary{"gpu::tanh", hip_add_tanh{}, hip_triadd_tanh{}}
find_add_unary{"gpu::tanh", hip_add_tanh{}, hip_triadd_tanh{}},
find_add_clip{}
);
// clang-format on
}
......
......@@ -17,6 +17,13 @@ void mul_add_relu(hipStream_t stream,
const argument& arg2,
const argument& arg3);
void add_clip(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
float max,
float min);
void add_relu(hipStream_t stream,
const argument& result,
const argument& arg1,
......@@ -32,6 +39,14 @@ void add_tanh(hipStream_t stream,
const argument& arg1,
const argument& arg2);
void add_clip(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3,
float max,
float min);
void add_relu(hipStream_t stream,
const argument& result,
const argument& arg1,
......
......@@ -95,16 +95,13 @@ void schedule_model::record(program& p, instruction_ref ins, std::size_t wait_id
static std::unordered_map<std::string, std::size_t> create_weight_map()
{
return {
{"hip::load_literal", 0},
{"hip::allocate", 0},
{"gpu::convolution", 4},
{"gpu::conv_bias_relu", 4},
{"gpu::pooling", 2},
{"gpu::gemm", 2},
{"gpu::concat", 1},
{"hip::add_relu", 2},
};
return {{"hip::load_literal", 0},
{"hip::allocate", 0},
{"gpu::convolution", 4},
{"gpu::conv_bias_relu", 4},
{"gpu::pooling", 2},
{"gpu::gemm", 2},
{"gpu::concat", 1}};
}
static const std::unordered_map<std::string, std::size_t>& weight_map()
......
......@@ -822,6 +822,27 @@ struct test_conv_relu_half : verify_program<test_conv_relu_half>
}
};
struct test_conv_bias_clipped_relu : verify_program<test_conv_bias_clipped_relu>
{
migraphx::program create_program() const
{
migraphx::program p;
auto input =
p.add_parameter("x", migraphx::shape{migraphx::shape::float_type, {4, 3, 3, 3}});
auto weights =
p.add_parameter("w", migraphx::shape{migraphx::shape::float_type, {4, 3, 3, 3}});
auto l0 = migraphx::literal{migraphx::shape{migraphx::shape::float_type, {4}},
{2.0f, 2.0f, 2.0f, 2.0f}};
auto bias = p.add_literal(l0);
auto conv = p.add_instruction(migraphx::op::convolution{}, input, weights);
auto bcast_add =
p.add_instruction(migraphx::op::broadcast{1, conv->get_shape().lens()}, bias);
auto bias_add = p.add_instruction(migraphx::op::add{}, conv, bcast_add);
p.add_instruction(migraphx::op::clip{6.0f, 0.0f}, bias_add);
return p;
}
};
struct test_add_relu : verify_program<test_add_relu>
{
migraphx::program create_program() const
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment