Add GroupNorm and LayerNorm onnx parsing (#2242)

52c74f0e · Attila Dusnoki · GitHub · f25606f9 · 52c74f0e · 52c74f0e
Unverified Commit 52c74f0e authored Oct 17, 2023 by Attila Dusnoki Committed by GitHub Oct 17, 2023
20 changed files
--- a/src/onnx/parse_groupnorm.cpp
+++ b/src/onnx/parse_groupnorm.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/onnx/op_parser.hpp>
+#include <migraphx/ranges.hpp>
+#include <migraphx/make_op.hpp>
+#include <migraphx/instruction.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace onnx {
+struct parse_groupnorm : op_parser<parse_groupnorm>
+{
+    std::vector<op_desc> operators() const { return {{"GroupNormalization"}}; }
+    instruction_ref parse(const op_desc& /*opd*/,
+                          const onnx_parser& parser,
+                          const onnx_parser::node_info& info,
+                          std::vector<instruction_ref> args) const
+    {
+        float epsilon = 1e-5f;
+        if(contains(info.attributes, "epsilon"))
+        {
+            epsilon = parser.parse_value(info.attributes.at("epsilon")).at<float>();
+        }
+        size_t num_groups;
+        if(contains(info.attributes, "num_groups"))
+        {
+            num_groups = parser.parse_value(info.attributes.at("num_groups")).at<size_t>();
+        }
+        else
+        {
+            MIGRAPHX_THROW("PARSE_GROUPNORM: num_groups must be available");
+        }
+        if(args.size() != 3)
+        {
+            MIGRAPHX_THROW("PARSE_GROUPNORM: invalid input count");
+        }
+        auto x     = args.at(0);
+        auto scale = args.at(1);
+        auto bias  = args.at(2);
+        auto x_shape = x->get_shape();
+        auto x_dtype = x_shape.type();
+        auto x_dims  = x_shape.lens();
+        if(x_shape.ndim() <= 2)
+        {
+            MIGRAPHX_THROW("PARSE_GROUPNORM: invalid input shape");
+        }
+        auto c = x_shape.lens().at(1);
+        if(c % num_groups != 0)
+        {
+            MIGRAPHX_THROW(
+                "PARSE_GROUPNORM: num_groups should be a divisor of the number of channels");
+        }
+        auto group_size = c / num_groups;
+        if(scale->get_shape().ndim() != 1 or scale->get_shape().lens().at(0) != num_groups)
+        {
+            MIGRAPHX_THROW("PARSE_GROUPNORM: scale tensor shape should be num_groups");
+        }
+        if(bias->get_shape().ndim() != 1 or bias->get_shape().lens().at(0) != num_groups)
+        {
+            MIGRAPHX_THROW("PARSE_GROUPNORM: bias tensor shape should be num_groups");
+        }
+        // Original shape: N x C x D1 x ... x Dn
+        // New shape: N x num_groups x C // num_groups x D1 x ... x Dn
+        std::vector<size_t> dims = {x_dims.at(0), num_groups, group_size};
+        std::copy(x_dims.begin() + 2, x_dims.end(), std::back_inserter(dims));
+        auto x_reshaped = info.add_instruction(make_op("reshape", {{"dims", dims}}), x);
+        // Axes for D1 x ... x Dn
+        std::vector<size_t> axes(dims.size() - 2);
+        std::iota(axes.begin(), axes.end(), 2);
+        // y = (x - mean) * rsqrt(variance + epsilon) * scale + bias
+        // mean = reduce_mean({D1, D2, ... Dk}, x)
+        // variance = reduce_mean({D1, D2, ... Dk}, (x - mean)^2)
+        auto mean = info.add_instruction(make_op("reduce_mean", {{"axes", axes}}), x_reshaped);
+        auto x_sub_mean    = info.add_common_op("sub", x_reshaped, mean);
+        auto x_sqdiff_mean = info.add_common_op("sqdiff", x_reshaped, mean);
+        auto variance =
+            info.add_instruction(make_op("reduce_mean", {{"axes", axes}}), x_sqdiff_mean);
+        epsilon =
+            (x_dtype == migraphx::shape::half_type and std::abs(epsilon) < 1e-7) ? 1e-7 : epsilon;
+        auto eps     = info.add_literal(migraphx::literal{migraphx::shape{x_dtype}, {epsilon}});
+        auto var_eps = info.add_common_op("add", variance, eps);
+        auto rsqrt   = info.add_instruction(make_op("rsqrt"), var_eps);
+        auto result  = info.add_common_op("mul", x_sub_mean, rsqrt);
+        auto scale_bcast =
+            info.add_instruction(make_op("broadcast", {{"axis", 1}, {"out_lens", dims}}), scale);
+        auto bias_bcast =
+            info.add_instruction(make_op("broadcast", {{"axis", 1}, {"out_lens", dims}}), bias);
+        auto scaled     = info.add_instruction(make_op("mul"), result, scale_bcast);
+        auto y          = info.add_instruction(make_op("add"), scaled, bias_bcast);
+        auto y_reshaped = info.add_instruction(make_op("reshape", {{"dims", x_dims}}), y);
+        return y_reshaped;
+    }
+};
+} // namespace onnx
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/onnx/parse_layernorm.cpp
+++ b/src/onnx/parse_layernorm.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/onnx/op_parser.hpp>
+#include <migraphx/ranges.hpp>
+#include <migraphx/make_op.hpp>
+#include <migraphx/instruction.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace onnx {
+struct parse_layernorm : op_parser<parse_layernorm>
+{
+    std::vector<op_desc> operators() const { return {{"LayerNormalization"}}; }
+    std::vector<instruction_ref> parse(const op_desc& /*opd*/,
+                                       const onnx_parser& parser,
+                                       const onnx_parser::node_info& info,
+                                       std::vector<instruction_ref> args) const
+    {
+        int64_t axis = -1;
+        if(contains(info.attributes, "axis"))
+        {
+            axis = parser.parse_value(info.attributes.at("axis")).at<int64_t>();
+        }
+        float epsilon = 1e-5f;
+        if(contains(info.attributes, "epsilon"))
+        {
+            epsilon = parser.parse_value(info.attributes.at("epsilon")).at<float>();
+        }
+        if(contains(info.attributes, "stash_type"))
+        {
+            std::cerr << "WARNING: LAYERNORM does not support stash_type, it will be ignored.\n";
+        }
+        if(args.size() < 2 or args.size() > 3)
+        {
+            MIGRAPHX_THROW("PARSE_LAYERNORM: invalid input count");
+        }
+        auto x         = args.at(0);
+        auto scale     = args.at(1);
+        bool skip_bias = args.size() == 2;
+        instruction_ref bias;
+        if(not skip_bias)
+        {
+            bias = args.at(2);
+        }
+        auto x_shape   = x->get_shape();
+        auto x_dtype   = x_shape.type();
+        int64_t x_rank = x_shape.ndim();
+        if(x_rank < 2)
+        {
+            MIGRAPHX_THROW("PARSE_LAYERNORM: invalid input shape");
+        }
+        // If rank(X) is r, axis' allowed range is [-r, r)
+        if(axis < -x_rank or axis >= x_rank)
+        {
+            MIGRAPHX_THROW("PARSE_LAYERNORM: invalid axis");
+        }
+        // y = (x - mean) * rsqrt(variance + epsilon) * scale + bias
+        // mean = reduce_mean({D1, D2, ... Dk}, x)
+        // variance = reduce_mean({D1, D2, ... Dk}, (x - mean)^2)
+        // axis can be negative
+        axis = axis < 0 ? axis + x_rank : axis;
+        auto kdims = x_rank - axis;
+        std::vector<int64_t> axes(kdims);
+        std::iota(axes.begin(), axes.end(), axis);
+        auto skipped_axes = x_rank - kdims;
+        auto mean          = info.add_instruction(make_op("reduce_mean", {{"axes", axes}}), x);
+        auto x_sub_mean    = info.add_common_op("sub", x, mean);
+        auto x_sqdiff_mean = info.add_common_op("sqdiff", x, mean);
+        auto variance =
+            info.add_instruction(make_op("reduce_mean", {{"axes", axes}}), x_sqdiff_mean);
+        epsilon =
+            (x_dtype == migraphx::shape::half_type and std::abs(epsilon) < 1e-7) ? 1e-7 : epsilon;
+        auto eps     = info.add_literal(migraphx::literal{migraphx::shape{x_dtype}, {epsilon}});
+        auto var_eps = info.add_common_op("add", variance, eps);
+        auto rsqrt   = info.add_instruction(make_op("rsqrt"), var_eps);
+        auto result  = info.add_common_op("mul", x_sub_mean, rsqrt);
+        instruction_ref scale_bcast = scale;
+        instruction_ref bias_bcast  = bias;
+        if(skipped_axes > 0)
+        {
+            auto x_dims = x_shape.lens();
+            scale_bcast = info.add_instruction(
+                make_op("broadcast", {{"axis", skipped_axes}, {"out_lens", x_dims}}), scale);
+            if(not skip_bias)
+            {
+                bias_bcast = info.add_instruction(
+                    make_op("broadcast", {{"axis", skipped_axes}, {"out_lens", x_dims}}), bias);
+            }
+        }
+        auto scaled = info.add_instruction(make_op("mul"), result, scale_bcast);
+        auto y      = skip_bias ? scaled : info.add_instruction(make_op("add"), scaled, bias_bcast);
+        return {y, mean, rsqrt};
+    }
+};
+} // namespace onnx
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/test/onnx/gen_onnx.py
+++ b/test/onnx/gen_onnx.py
@@ -2722,6 +2722,119 @@ def group_conv_test():
    return ([node], [x, y], [z])
+def group_norm_test(x_dims,
+                    scale_dims,
+                    bias_dims,
+                    y_dims,
+                    num_groups,
+                    eps_value=1e-5,
+                    dtype=TensorProto.FLOAT):
+    x = helper.make_tensor_value_info('x', dtype, x_dims)
+    scale = helper.make_tensor_value_info('scale', dtype, scale_dims)
+    bias = helper.make_tensor_value_info('bias', dtype, bias_dims)
+    y = helper.make_tensor_value_info('y', dtype, y_dims)
+    node = onnx.helper.make_node('GroupNormalization',
+                                 inputs=['x', 'scale', 'bias'],
+                                 outputs=['y'],
+                                 num_groups=num_groups,
+                                 epsilon=eps_value)
+    return ([node], [x, scale, bias], [y])
+@onnx_test()
+def group_norm_3d_test():
+    return group_norm_test([1, 4, 2], [2], [2], [1, 4, 2], 2)
+@onnx_test()
+def group_norm_3d_half_test():
+    return group_norm_test([1, 4, 2], [2], [2], [1, 4, 2],
+                           2,
+                           dtype=TensorProto.FLOAT16)
+@onnx_test()
+def group_norm_4d_test():
+    return group_norm_test([1, 4, 3, 3], [2], [2], [1, 4, 3, 3], 2)
+@onnx_test()
+def group_norm_4d_half_test():
+    return group_norm_test([1, 4, 3, 3], [2], [2], [1, 4, 3, 3],
+                           2,
+                           dtype=TensorProto.FLOAT16)
+@onnx_test()
+def group_norm_5d_test():
+    return group_norm_test([3, 3, 3, 3, 3], [1], [1], [3, 3, 3, 3, 3], 1)
+@onnx_test()
+def group_norm_5d_half_test():
+    return group_norm_test([3, 3, 3, 3, 3], [1], [1], [3, 3, 3, 3, 3],
+                           1,
+                           dtype=TensorProto.FLOAT16)
+@onnx_test()
+def group_norm_small_eps_half_test():
+    return group_norm_test([1, 4, 2], [2], [2], [1, 4, 2],
+                           2,
+                           eps_value=1e-12,
+                           dtype=TensorProto.FLOAT16)
+@onnx_test()
+def group_norm_invalid_num_groups_error_test():
+    return group_norm_test([1, 4, 3, 3], [2], [2], [1, 4, 3, 3], 3)
+@onnx_test()
+def group_norm_missing_attribute_error_test():
+    x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [1, 4])
+    scale = helper.make_tensor_value_info('scale', TensorProto.FLOAT, [2])
+    bias = helper.make_tensor_value_info('bias', TensorProto.FLOAT, [2])
+    y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [1, 4])
+    node = onnx.helper.make_node('GroupNormalization',
+                                 inputs=['x', 'scale', 'bias'],
+                                 outputs=['y'])
+    return ([node], [x, scale, bias], [y])
+@onnx_test()
+def group_norm_invalid_input_count_error_test():
+    x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [1, 4, 3, 3])
+    scale = helper.make_tensor_value_info('scale', TensorProto.FLOAT, [2])
+    y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [1, 4, 3, 3])
+    node = onnx.helper.make_node('GroupNormalization',
+                                 inputs=['x', 'scale'],
+                                 outputs=['y'],
+                                 num_groups=2)
+    return ([node], [x, scale], [y])
+@onnx_test()
+def group_norm_invalid_input_shape_error_test():
+    return group_norm_test([1, 4], [2], [2], [1, 4], 2)
+@onnx_test()
+def group_norm_invalid_scale_shape_test():
+    return group_norm_test([1, 4, 3, 3], [1], [2], [1, 4, 3, 3], 2)
+@onnx_test()
+def group_norm_invalid_bias_shape_test():
+    return group_norm_test([1, 4, 3, 3], [2], [3], [1, 4, 3, 3], 2)
 @onnx_test()
 def hardsigmoid_default_test():
    x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [1, 3, 4, 5])
@@ -3804,6 +3917,110 @@ def layernorm_test():
             bias_add], [x, scale, bias], [y], [pow_tensor, epsilon_tensor])
+def make_layer_norm(shape, axis, dtype=TensorProto.FLOAT):
+    norm_axis = axis + len(shape) if axis < 0 else axis
+    x = helper.make_tensor_value_info('x', dtype, shape)
+    scale = helper.make_tensor_value_info('scale', dtype, shape[norm_axis:])
+    bias = helper.make_tensor_value_info('bias', dtype, shape[norm_axis:])
+    y = helper.make_tensor_value_info('y', dtype, shape)
+    node = onnx.helper.make_node('LayerNormalization',
+                                 inputs=['x', 'scale', 'bias'],
+                                 outputs=['y'],
+                                 axis=axis)
+    return ([node], [x, scale, bias], [y])
+@onnx_test()
+def layer_norm_invalid_shape_error_test():
+    return make_layer_norm([3], 0)
+@onnx_test()
+def layer_norm_2d_axis_zero_test():
+    return make_layer_norm([3, 4], 0)
+@onnx_test()
+def layer_norm_2d_axis_one_test():
+    return make_layer_norm([3, 4], 1)
+@onnx_test()
+def layer_norm_2d_axis_minus_one_test():
+    return make_layer_norm([3, 4], -1)
+@onnx_test()
+def layer_norm_3d_test():
+    return make_layer_norm([1, 4, 2], -1)
+@onnx_test()
+def layer_norm_3d_half_test():
+    return make_layer_norm([1, 4, 2], -1, TensorProto.FLOAT16)
+@onnx_test()
+def layer_norm_4d_test():
+    return make_layer_norm([3, 3, 3, 3], -1)
+@onnx_test()
+def layer_norm_4d_half_test():
+    return make_layer_norm([3, 3, 3, 3], -1, TensorProto.FLOAT16)
+@onnx_test()
+def layer_norm_invalid_axis_error_test():
+    return make_layer_norm([1, 4, 2], 1000)
+@onnx_test()
+def layer_norm_invalid_minus_axis_error_test():
+    return make_layer_norm([1, 4, 2], -1000)
+@onnx_test()
+def layer_norm_invalid_input_count_error_test():
+    x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [1, 2])
+    y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [1, 2])
+    node = onnx.helper.make_node('LayerNormalization',
+                                 inputs=['x'],
+                                 outputs=['y'])
+    return ([node], [x], [y])
+@onnx_test()
+def layer_norm_without_bias_test():
+    x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [1, 2])
+    scale = helper.make_tensor_value_info('scale', TensorProto.FLOAT, [2])
+    y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [1, 2])
+    node = onnx.helper.make_node('LayerNormalization',
+                                 inputs=['x', 'scale'],
+                                 outputs=['y'])
+    return ([node], [x, scale], [y])
+@onnx_test()
+def layer_norm_small_eps_half_test():
+    x = helper.make_tensor_value_info('x', TensorProto.FLOAT16, [1, 2])
+    scale = helper.make_tensor_value_info('scale', TensorProto.FLOAT16, [2])
+    y = helper.make_tensor_value_info('y', TensorProto.FLOAT16, [1, 2])
+    node = onnx.helper.make_node('LayerNormalization',
+                                 inputs=['x', 'scale'],
+                                 outputs=['y'],
+                                 epsilon=1e-12)
+    return ([node], [x, scale], [y])
 @onnx_test()
 def leaky_relu_test():
    x = helper.make_tensor_value_info('0', TensorProto.FLOAT, [3])

--- a/test/onnx/group_norm_3d_half_test.onnx
+++ b/test/onnx/group_norm_3d_half_test.onnx
+group_norm_3d_half_test:
+M
+x
+scale
+biasy"GroupNormalization*
+epsilon'7*
+num_groupsgroup_norm_3d_half_testZ
+x
+Z
+scale
+Z
+bias
+b
+y
+B
\ No newline at end of file
--- a/test/onnx/group_norm_3d_test.onnx
+++ b/test/onnx/group_norm_3d_test.onnx
+	group_norm_3d_test:
+:
+x
+scale
+biasy"GroupNormalization*
+num_groupsgroup_norm_3d_testZ
+x
+Z
+scale
+Z
+bias
+b
+y
+B
\ No newline at end of file
--- a/test/onnx/group_norm_4d_half_test.onnx
+++ b/test/onnx/group_norm_4d_half_test.onnx
+group_norm_4d_half_test:
+M
+x
+scale
+biasy"GroupNormalization*
+epsilon'7*
+num_groupsgroup_norm_4d_half_testZ
+x
+Z
+scale
+Z
+bias
+b
+y
+B
\ No newline at end of file
--- a/test/onnx/group_norm_4d_test.onnx
+++ b/test/onnx/group_norm_4d_test.onnx
+	group_norm_4d_test:
+:
+x
+scale
+biasy"GroupNormalization*
+num_groupsgroup_norm_4d_testZ
+x
+Z
+scale
+Z
+bias
+b
+y
+B
\ No newline at end of file
--- a/test/onnx/group_norm_5d_half_test.onnx
+++ b/test/onnx/group_norm_5d_half_test.onnx
+group_norm_5d_half_test:
+M
+x
+scale
+biasy"GroupNormalization*
+epsilon'7*
+num_groupsgroup_norm_5d_half_testZ
+x
+Z
+scale
+Z
+bias
+b
+y
+B
\ No newline at end of file
--- a/test/onnx/group_norm_5d_test.onnx
+++ b/test/onnx/group_norm_5d_test.onnx
+	group_norm_5d_test:
+:
+x
+scale
+biasy"GroupNormalization*
+num_groupsgroup_norm_5d_testZ
+x
+Z
+scale
+Z
+bias
+b
+y
+B
\ No newline at end of file
--- a/test/onnx/group_norm_invalid_bias_shape_test.onnx
+++ b/test/onnx/group_norm_invalid_bias_shape_test.onnx
+	"group_norm_invalid_bias_shape_test:
+:
+x
+scale
+biasy"GroupNormalization*
+num_groups"group_norm_invalid_bias_shape_testZ
+x
+Z
+scale
+Z
+bias
+b
+y
+B
\ No newline at end of file
--- a/test/onnx/group_norm_invalid_input_count_error_test.onnx
+++ b/test/onnx/group_norm_invalid_input_count_error_test.onnx
+	)group_norm_invalid_input_count_error_test:
+4
+x
+scaley"GroupNormalization*
+num_groups)group_norm_invalid_input_count_error_testZ
+x
+Z
+scale
+b
+y
+B
\ No newline at end of file
--- a/test/onnx/group_norm_invalid_input_shape_error_test.onnx
+++ b/test/onnx/group_norm_invalid_input_shape_error_test.onnx
+	)group_norm_invalid_input_shape_error_test:
+:
+x
+scale
+biasy"GroupNormalization*
+num_groups)group_norm_invalid_input_shape_error_testZ
+x
+Z
+scale
+Z
+bias
+b
+y
+B
\ No newline at end of file
--- a/test/onnx/group_norm_invalid_num_groups_error_test.onnx
+++ b/test/onnx/group_norm_invalid_num_groups_error_test.onnx
+	(group_norm_invalid_num_groups_error_test:
+:
+x
+scale
+biasy"GroupNormalization*
+num_groups(group_norm_invalid_num_groups_error_testZ
+x
+Z
+scale
+Z
+bias
+b
+y
+B
\ No newline at end of file
--- a/test/onnx/group_norm_invalid_scale_shape_test.onnx
+++ b/test/onnx/group_norm_invalid_scale_shape_test.onnx
+	#group_norm_invalid_scale_shape_test:
+:
+x
+scale
+biasy"GroupNormalization*
+num_groups#group_norm_invalid_scale_shape_testZ
+x
+Z
+scale
+Z
+bias
+b
+y
+B
\ No newline at end of file
--- a/test/onnx/group_norm_missing_attribute_error_test.onnx
+++ b/test/onnx/group_norm_missing_attribute_error_test.onnx
+	'group_norm_missing_attribute_error_test:
+'
+x
+scale
+biasy"GroupNormalization'group_norm_missing_attribute_error_testZ
+x
+Z
+scale
+Z
+bias
+b
+y
+B
\ No newline at end of file
--- a/test/onnx/group_norm_small_eps_half_test.onnx
+++ b/test/onnx/group_norm_small_eps_half_test.onnx
+group_norm_small_eps_half_test:
+M
+x
+scale
+biasy"GroupNormalization*
+epsilon̼+*
+num_groupsgroup_norm_small_eps_half_testZ
+x
+Z
+scale
+Z
+bias
+b
+y
+B
\ No newline at end of file
--- a/test/onnx/layer_norm_2d_axis_minus_one_test.onnx
+++ b/test/onnx/layer_norm_2d_axis_minus_one_test.onnx
+	!layer_norm_2d_axis_minus_one_test:
+=
+x
+scale
+biasy"LayerNormalization*
+axis!layer_norm_2d_axis_minus_one_testZ
+x
+Z
+scale
+Z
+bias
+b
+y
+B
\ No newline at end of file
--- a/test/onnx/layer_norm_2d_axis_one_test.onnx
+++ b/test/onnx/layer_norm_2d_axis_one_test.onnx
+	layer_norm_2d_axis_one_test:
+4
+x
+scale
+biasy"LayerNormalization*
+axislayer_norm_2d_axis_one_testZ
+x
+Z
+scale
+Z
+bias
+b
+y
+B
\ No newline at end of file
--- a/test/onnx/layer_norm_2d_axis_zero_test.onnx
+++ b/test/onnx/layer_norm_2d_axis_zero_test.onnx
--- a/test/onnx/layer_norm_3d_half_test.onnx
+++ b/test/onnx/layer_norm_3d_half_test.onnx
+layer_norm_3d_half_test:
+=
+x
+scale
+biasy"LayerNormalization*
+axislayer_norm_3d_half_testZ
+x
+Z
+scale
+Z
+bias
+b
+y
+B
\ No newline at end of file