Commit eea36256 authored by turneram's avatar turneram
Browse files

Add attention and layernorm ops

parent 31906785
......@@ -116,6 +116,7 @@ register_migraphx_ops(
if_op
im2col
isnan
layernorm
leaky_relu
less
load
......
#ifndef MIGRAPHX_GUARD_OPERATORS_LAYERNORMALIZATION_HPP
#define MIGRAPHX_GUARD_OPERATORS_LAYERNORMALIZATION_HPP
#include <array>
#include <migraphx/check_shapes.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/streamutils.hpp>
#include <migraphx/literal.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <migraphx/value.hpp>
#include <migraphx/op/normalize_attribute.hpp>
#include <migraphx/par_for.hpp>
#include <cmath>
#include <utility>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct layernorm
{
float epsilon = 1e-3;
int64_t axis = -1;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.epsilon, "epsilon"), f(self.axis, "axis"));
}
value attributes() const
{
value normalize;
normalize["axis"] = value::array{normalize_attribute::include_min};
return {{"normalize_axes", normalize}};
}
std::string name() const { return "layernorm"; }
shape normalize_compute_shape(std::vector<shape> inputs) const
{
if(inputs.size() == 2)
{
if(inputs.at(1).lens().front() != inputs.front().lens().at(axis))
MIGRAPHX_THROW("LAYERNORM: weights have wrong shape");
}
if(inputs.size() == 3)
{
if(inputs.at(2).lens().front() != inputs.front().lens().at(axis))
MIGRAPHX_THROW("LAYERNORM: bias has wrong shape");
}
return inputs.front();
}
argument compute(const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
auto x_lens = args.front().get_shape().lens();
auto norm_count = std::accumulate(
x_lens.begin(), x_lens.begin() + axis, std::size_t{1}, std::multiplies<std::size_t>());
auto norm_size = std::accumulate(
x_lens.begin() + axis, x_lens.end(), std::size_t{1}, std::multiplies<std::size_t>());
/* std::vector<std::size_t> mean_inv_std_dev_dim(x_lens.size());
for (std::size_t i = 0; i < x_lens.size(); ++i)
{
if (i < axis)
mean_inv_std_dev_dim.at(i) = x_lens[i];
else
mean_inv_std_dev_dim.at(i) = 1;
} */
visit_all(result, args[0], args[1], args[2])(
[&](auto output, auto data, auto weights, auto bias) {
par_for(norm_count, [&](auto idx) {
auto offset = idx * norm_size;
double mean = 0;
double mean_square = 0;
for(std::size_t i = 0; i < norm_size; ++i)
{
mean += data[offset + i];
mean_square += data[offset + i] * data[offset + i];
}
mean /= norm_size;
mean_square = sqrt(mean_square / norm_size - mean * mean + epsilon);
for(std::size_t i = 0; i < norm_size; ++i)
{
output[offset + i] =
(data[offset + i] - mean) / mean_square;
/* if(args.size() == 3)
output[offset + i] =
(data[offset + i] - mean) / mean_square * weights[i] + bias[i];
else
output[offset + i] =
(data[offset + i] - mean) / mean_square * weights[i]; */
// scale and bias handled by onnx parser
}
});
});
return result;
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -42,6 +42,7 @@
#include <migraphx/op/if_op.hpp>
#include <migraphx/op/im2col.hpp>
#include <migraphx/op/isnan.hpp>
#include <migraphx/op/layernorm.hpp>
#include <migraphx/op/leaky_relu.hpp>
#include <migraphx/op/less.hpp>
#include <migraphx/op/load.hpp>
......
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/instruction.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
struct parse_attention : op_parser<parse_attention>
{
std::vector<op_desc> operators() const { return {{"Attention"}}; }
instruction_ref parse(const op_desc& /*opd*/,
const onnx_parser& parser,
onnx_parser::node_info info,
const std::vector<instruction_ref>& args) const
{
auto input = args[0];
auto weights = args[1];
auto bias = args[2];
auto mask_index = args[3];
instruction_ref past;
instruction_ref extra_add_qk;
bool is_past = false;
bool is_extra_add_qk = false;
if (args.size() > 4)
{
past = args[4];
is_past = true;
}
if (args.size() == 6)
{
is_extra_add_qk = true;
extra_add_qk = args[5];
}
// ORT default is 12
std::size_t num_heads = 12;
if(contains(info.attributes, "num_heads"))
num_heads = info.attributes.at("num_heads").i();
// input shape: (batch_size, sequence_length, input_hidden_size)
auto input_lens = input->get_shape().lens();
auto batch_size = input_lens.at(0);
auto sequence_length = input_lens.at(1);
auto input_hidden_size = input_lens.at(2);
// bias shape: (3 * hidden_size)
auto bias_lens = bias->get_shape().lens();
auto hidden_size = bias_lens.at(0) / 3;
auto head_size = hidden_size / num_heads;
int past_sequence_length = 0;
// GetPresent
// Input and output shapes:
// past : (2, batch_size, num_heads, past_sequence_length, head_size)
// present : (2, batch_size, num_heads, past_sequence_length + sequence_length, head_size)
std::vector<std::size_t> present_lens{2, batch_size, num_heads, sequence_length, head_size};
if (is_past)
{
auto past_lens = past->get_shape().lens();
past_sequence_length = past_lens.at(3);
present_lens[3] += past_lens[3];
}
// Use GEMM for fully connection.
auto m = batch_size * sequence_length;
auto n = bias_lens.front();
auto k = input_hidden_size;
// Bias shape is (N), broadcast using B(N, M) = 1 * bias(N, 1) x ones(1, M) + 0 * B.
auto bias_type = bias->get_shape().type();
std::vector<float> ones_vec(m, 1);
std::vector<std::size_t> ones_lens{1, m};
auto ones = info.add_literal(migraphx::literal{migraphx::shape{bias_type, ones_lens}, ones_vec});
bias = info.add_instruction(migraphx::make_op("reshape", {{"dims", {n, 1}}}), bias);
auto gemm_1 = info.add_instruction(migraphx::make_op("dot"), bias, ones/* info.make_contiguous(mb_bias), info.make_contiguous(ones) */);
gemm_1 = info.add_instruction(migraphx::make_op("transpose", {{"permutation", {1, 0}}}), gemm_1);
/// ORT: Gemm, note that ROCM assumes col-major, so result(N, M) = 1 * weights x input + 1 x B.
/// Assume row-major => results(N, M) = 1 * input x weights + 1 x B ?
auto input_sq = info.add_instruction(migraphx::make_op("reshape", {{"dims", {batch_size * sequence_length, hidden_size}}}), input);
auto gemm_2 = info.add_instruction(migraphx::make_op("dot"), input_sq, weights);
auto add_gemms = info.add_instruction(migraphx::make_op("add"), gemm_1, gemm_2);
// LaunchAttentionKernel:
// LaunchTransQkv
// input should be BxSx3xNxH => scratch3: 3xBxNxSxH
add_gemms = info.add_instruction(migraphx::make_op("reshape", {{"dims", {batch_size, sequence_length, 3, num_heads, head_size}}}), add_gemms);
std::vector<std::size_t> qkv_perm{2, 0, 3, 1, 4};
auto transqkv = info.add_instruction(migraphx::make_op("transpose", {{"permutation", qkv_perm}}), add_gemms);
// now scratch3 has Q, K, V: each has size BxNxSxH
// => transqkv has shape 3xBxNxSxH
auto batches = batch_size * num_heads;
auto size_per_batch = sequence_length * head_size;
auto total_size = batches * size_per_batch;
auto q_t = info.add_instruction(migraphx::make_op("slice", {{"axes", {0}}, {"starts", {0}}, {"ends", {1}}}), transqkv);
auto k_t = info.add_instruction(migraphx::make_op("slice", {{"axes", {0}}, {"starts", {1}}, {"ends", {2}}}), transqkv);
auto v_t = info.add_instruction(migraphx::make_op("slice", {{"axes", {0}}, {"starts", {2}}, {"ends", {3}}}), transqkv);
q_t = info.add_instruction(make_op("squeeze", {{"axes", {0}}}), q_t);
k_t = info.add_instruction(make_op("squeeze", {{"axes", {0}}}), k_t);
v_t = info.add_instruction(make_op("squeeze", {{"axes", {0}}}), v_t);
if (is_past)
{
k_t = info.add_instruction(migraphx::make_op("concat", {{"axis", 3}}), past, k_t);
v_t = info.add_instruction(migraphx::make_op("slice", {{"axes", {0}}, {"starts", {1}}, {"ends", {3}}}), k_t);
}
// Raw attention mask could be 2D (BxS) or 3D (BxSxS*) or 4D(Bx1xMxM), where M is the max sequence length.
auto mask_index_lens = mask_index->get_shape().lens();
bool use_raw_attention_mask = mask_index_lens.size() >= 2;
// compute Q*K' (as K'*Q), scaled by 1/sqrt(H) and store in scratch1: BxNxSxS*
// Q: BxNxSxH, K (present_k): BxNxS*xH, Q*K': BxNxSxS*
const float rsqrt_head_size = 1.f / sqrt(static_cast<float>(head_size));
const int all_sequence_length = past_sequence_length + sequence_length;
const int temp_matrix_size = sequence_length * all_sequence_length;
// For raw attention mask, the scalar if 1/sqrt(H) is moved to softmax computation.
const float alpha = use_raw_attention_mask ? 1.0 : rsqrt_head_size;
// K{B,N,S,H} -> K'{B,N,H,S}
k_t = info.add_instruction(make_op("transpose", {{"permutation", {0, 1, 3, 2}}}), k_t);
auto gemm3 = info.add_instruction(migraphx::make_op("dot"), q_t, k_t);
if (is_extra_add_qk)
gemm3 = info.add_instruction(make_op("add"), gemm3, extra_add_qk);
auto alpha_lit = info.add_instruction(
migraphx::make_op("multibroadcast", {{"out_lens", gemm3->get_shape().lens()}}),
info.add_literal(migraphx::literal{migraphx::shape{gemm3->get_shape().type()}, {alpha}}));
gemm3 = info.add_instruction(migraphx::make_op("mul"), gemm3, info.make_contiguous(alpha_lit));
// apply softmax and store result P to scratch2: BxNxSxS*
std::vector<float> mask(batch_size*num_heads*sequence_length*all_sequence_length, 0);
if (false and mask_index_lens.size() >= 2)
{
}
else if (false and mask_index_lens.size() == 1)
{
}
// else => no mask
auto softmax = info.add_instruction(migraphx::make_op("softmax", {{"axis", 3}}), gemm3);
// compute P*V (as V*P), and store in scratch3: BxNxSxH
auto gemm4 = info.add_instruction(migraphx::make_op("dot"), softmax, v_t);
// scratch3 is BxNxSxH, transpose to output BxSxNxH
gemm4 = info.add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 2, 1, 3}}}), gemm4);
gemm4 = info.add_instruction(make_op("reshape", {{"dims", {batch_size, sequence_length, num_heads * head_size}}}), info.make_contiguous(gemm4));
return gemm4;
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/op/layernorm.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
struct parse_layernorm : op_parser<parse_layernorm>
{
std::vector<op_desc> operators() const { return {{"LayerNormalization"}}; }
instruction_ref parse(const op_desc& /*opd*/,
const onnx_parser& parser,
onnx_parser::node_info info,
const std::vector<instruction_ref>& args) const
{
float epsilon = 1e-3f;
int64_t axis = -1;
if(contains(info.attributes, "epsilon"))
{
epsilon = parser.parse_value(info.attributes.at("epsilon")).at<float>();
}
if(contains(info.attributes, "axis"))
{
epsilon = parser.parse_value(info.attributes.at("axis")).at<int64_t>();
}
auto layernorm = info.add_instruction(make_op("layernorm", {{"epsilon", epsilon}, {"axis", axis}}), args.front());
if (args.size() == 3)
{
layernorm = info.add_instruction(make_op("mul"), layernorm, args.at(1));
layernorm = info.add_instruction(make_op("add"), layernorm, args.at(2));
}
return layernorm;
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -148,6 +148,7 @@ add_library(migraphx_gpu
int8_conv_pack.cpp
int8_gemm_pack.cpp
kernel.cpp
layernorm.cpp
lowering.cpp
logsoftmax.cpp
loop.cpp
......@@ -203,6 +204,7 @@ register_migraphx_gpu_ops(hip_
floor
gather
greater
layernorm
less
log
logsoftmax
......
......@@ -12,6 +12,8 @@ namespace device {
void layernorm(hipStream_t stream, const argument& result, const argument& arg1);
//void layernorm(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2, const argument& arg3, const int64_t axis);
void triadd_layernorm(hipStream_t stream,
const argument& result,
const argument& arg1,
......
#ifndef MIGRAPHX_GUARD_RTGLIB_LAYERNORM_HPP
#define MIGRAPHX_GUARD_RTGLIB_LAYERNORM_HPP
#include <migraphx/op/layernorm.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/reflect.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct context;
struct hip_layernorm
{
op::layernorm op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "gpu::layernorm"; }
shape compute_shape(std::vector<shape> inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
void finalize(context&, const shape&, const std::vector<shape>&);
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#include <migraphx/gpu/layernorm.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/layernorm.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape hip_layernorm::compute_shape(std::vector<shape> inputs) const
{
std::cout << "compute shape" << std::endl;
inputs.pop_back();
return op.normalize_compute_shape(inputs);
}
argument hip_layernorm::compute(context& ctx, const shape&, const std::vector<argument>& args) const
{
/* if (args.size() == 3)
{
auto n_dim = args.front().get_shape().lens().size();
auto tuned_axis = tune_axis(n_dim, op.axis, op.name());
device::layernorm(ctx.get_stream().get(), args.back(), args[0], args[1], args[2], tuned_axis);
}
else */
std::cout << "calling device::ln" << std::endl;
{
device::layernorm(ctx.get_stream().get(), args.back(), args[0]);
std::cout << "called device::ln" << std::endl;
}
return args.back();
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -11,6 +11,7 @@
#include <migraphx/op/dot.hpp>
#include <migraphx/op/elu.hpp>
#include <migraphx/op/if_op.hpp>
#include <migraphx/op/layernorm.hpp>
#include <migraphx/op/leaky_relu.hpp>
#include <migraphx/op/lrn.hpp>
#include <migraphx/op/pooling.hpp>
......@@ -29,6 +30,7 @@
#include <migraphx/gpu/gemm.hpp>
#include <migraphx/gpu/greater.hpp>
#include <migraphx/gpu/int8_conv_pack.hpp>
#include <migraphx/gpu/layernorm.hpp>
#include <migraphx/gpu/leaky_relu.hpp>
#include <migraphx/gpu/less.hpp>
#include <migraphx/gpu/logical_and.hpp>
......@@ -139,6 +141,7 @@ struct miopen_apply
add_generic_op("exp");
add_generic_op("floor");
add_generic_op("greater");
add_generic_op("layernorm");
add_generic_op("less");
add_generic_op("log");
add_generic_op("logical_and");
......@@ -386,6 +389,10 @@ struct miopen_apply
apply_map.emplace(op_name, [=](instruction_ref ins) {
auto output = insert_allocation(ins, ins->get_shape());
std::vector<instruction_ref> refs = ins->inputs();
if (op_name == "layernorm")
{
std::cout << "layernorm op" << std::endl;
}
refs.push_back(output);
return mod->replace_instruction(ins, make_op(gpu_name), refs);
......
......@@ -2593,6 +2593,22 @@ def layernorm_test():
bias_add], [x, scale, bias], [y], [pow_tensor, epsilon_tensor])
@onnx_test
def layernorm_op_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [1, 2, 3])
w = helper.make_tensor_value_info('w', TensorProto.FLOAT, [3])
b = helper.make_tensor_value_info('b', TensorProto.FLOAT, [3])
output = helper.make_tensor_value_info('output', TensorProto.FLOAT,
[1, 2, 3])
node = onnx.helper.make_node('LayerNormalization',
inputs=['x', 'w', 'b'],
outputs=["output"],
epsilon=1e-5)
return ([node], [x, w, b], [output])
@onnx_test
def leaky_relu_test():
x = helper.make_tensor_value_info('0', TensorProto.FLOAT, [3])
......
......@@ -446,6 +446,34 @@ TEST_CASE(instance_norm_3d_test)
EXPECT(migraphx::verify_range(result_vector, gold));
}
TEST_CASE(layernorm_op_test)
{
migraphx::program p = migraphx::parse_onnx("layernorm_op_test.onnx");
p.compile(migraphx::ref::target{});
migraphx::shape sx{migraphx::shape::float_type, {1, 2, 3}};
migraphx::shape swb{migraphx::shape::float_type, {3}};
std::vector<float> x_vec{1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
std::vector<float> w_vec{1.0, 1.0, 1.0};
std::vector<float> b_vec{0.0, 0.0, 0.0};
migraphx::parameter_map pp;
pp["x"] = migraphx::argument(sx, x_vec.data());
pp["w"] = migraphx::argument(swb, w_vec.data());
pp["b"] = migraphx::argument(swb, b_vec.data());
auto result = p.eval(pp).back();
std::vector<float> result_vector(6);
result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); });
std::vector<float> gold{-1.22474f, 0.0f, 1.22474f, -1.22474f, 0.0f, 1.22474f};
for(auto&& i : result_vector)
std::cout << i << ", ";
std::cout << std::endl;
EXPECT(migraphx::verify_range(result_vector, gold));
}
TEST_CASE(lessorequal_test)
{
migraphx::program p = migraphx::parse_onnx("lessorequal_test.onnx");
......
......@@ -2238,6 +2238,28 @@ TEST_CASE(imagescaler_test)
EXPECT(migraphx::verify_range(results_vector, gold));
}
TEST_CASE(layernorm_test)
{
migraphx::program p;
auto* mm = p.get_main_module();
migraphx::shape sx{migraphx::shape::float_type, {1, 2, 3}};
migraphx::shape swb{migraphx::shape::float_type, {3}};
std::vector<float> x_vec{1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
auto x = mm->add_literal(migraphx::literal{sx, x_vec});
auto w = mm->add_literal(migraphx::literal{swb, {1.0, 1.0, 1.0}});
auto b = mm->add_literal(migraphx::literal{swb, {0.0, 0.0, 0.0}});
mm->add_instruction(migraphx::make_op("layernorm", {{"epsilon", 1e-5}}), x, w, b);
p.compile(migraphx::ref::target{});
auto result = p.eval({}).back();
std::vector<float> results_vector(1 * 2 * 3);
result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
std::vector<float> gold = {-1.22474f, 0.0f, 1.22474f, -1.22474f, 0.0f, 1.22474f};
for(auto&& i : results_vector)
std::cout << i << ", ";
std::cout << std::endl;
EXPECT(migraphx::verify_range(results_vector, gold));
}
TEST_CASE(leaky_relu_test)
{
migraphx::program p;
......
#include "verify_program.hpp"
#include <migraphx/program.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/make_op.hpp>
struct test_layernorm : verify_program<test_layernorm>
{
migraphx::program create_program() const
{
migraphx::program p;
auto* mm = p.get_main_module();
auto x = mm->add_parameter("x", migraphx::shape{migraphx::shape::float_type, {1, 1, 768}});
auto w = mm->add_parameter("w", migraphx::shape{migraphx::shape::float_type, {768}});
auto b = mm->add_parameter("b", migraphx::shape{migraphx::shape::float_type, {768}});
mm->add_instruction(migraphx::make_op("layernorm", {{"axis", -1}}), x);
p.debug_print();
return p;
}
};
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment