Pooling op. calculation changes (#1823)

Changes to the way Pooling operation calculates pooling when there's padding. Old code would clip off any padding values before computing; for instance if an Average pooling window contained 0 1 2 where the 0 is padding, the result was 1.5 instead of 1.0. See Issue 1766

Pooling op. calculation changes (#1823)
Changes to the way Pooling operation calculates pooling when there's padding. Old code would clip off any padding values before computing; for instance if an Average pooling window contained 0 1 2 where the 0 is padding, the result was 1.5 instead of 1.0. See Issue 1766
bb06dbf5 · Brian Pickrell · GitHub · 8428a242 · bb06dbf5 · bb06dbf5
Unverified Commit bb06dbf5 authored Jul 09, 2023 by Brian Pickrell Committed by GitHub Jul 09, 2023
Showing with 298 additions and 59 deletions

src/include/migraphx/op/pooling.hpp src/include/migraphx/op/pooling.hpp +110 -16

test/ref_ops_test.cpp test/ref_ops_test.cpp +145 -43

test/verify/test_avg_pooling_pad.cpp test/verify/test_avg_pooling_pad.cpp +43 -0

No files found.
--- a/src/include/migraphx/op/pooling.hpp
+++ b/src/include/migraphx/op/pooling.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -42,16 +42,43 @@ namespace op {

 struct pooling
 {
-    pooling_mode mode                = {pooling_mode::average};
+    pooling_mode mode = {pooling_mode::average};
+
+    // Padding along each spatial input dimension
+    // Can be ndim or 2*ndim values where ndim is size of lengths
+    // ndim values means pad the same before and after each dimension
+    // 2*ndim values contains n pre and then n post padding values
    std::vector<std::size_t> padding = {0, 0};
-    std::vector<std::size_t> stride  = {1, 1};
+
+    // Size of stride to take from one placement of the pooling kernel to the next.
+    // This is distinct from the strides used by the shape class.  Must be the same
+    // ndim as lengths.
+    std::vector<std::size_t> stride = {1, 1};
+
+    // Spatial dimensions of the pooling kernel or window,
+    // 2 smaller than the input tensor rank (NCHW layout)
    std::vector<std::size_t> lengths = {1, 1};
-    bool ceil_mode                   = false;
-    int lp_order                     = 2;
+
+    // Dilations are not supported at this time.
+
+    // ceiling mode is a flag affecting output size
+    // or equivalently, placements of the pooling kernel.
+    // When true, round the size upwards, possibly
+    // including partial placements where the kernel extends beyond the edge
+    // of input and even padding.  When false, round down so that all
+    // kernel placements fit but some input values may be dropped.
+    bool ceil_mode = false;
+    int lp_order   = 2;

    // Global pooling with dynamic shape input
    bool dyn_global = false;

+    // an attribute of the Onnx pooling operator, not currently enabled here because MIOpen can't
+    // support it. We currently implement padding for average pooling by inserting a Padding
+    // operator during Onnx parsing. But to support dynamic shape inputs and count_include_pad
+    // together, it would be necessary to do this calculation at runtime in MIOpen.
+    bool count_include_pad = false;
+
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
@@ -68,11 +95,29 @@ struct pooling

    void check_attribute_size() const
    {
-        if((padding.size() != stride.size() and (padding.size() / 2) != stride.size()) or
-           (not dyn_global and stride.size() != lengths.size()))
+        if(dyn_global)
+            return;
+        if((padding.size() != stride.size() and (padding.size()) != stride.size() * 2) or
+           stride.size() != lengths.size())
        {
            MIGRAPHX_THROW("POOLING: inconsistent attribute sizes");
        }
+        if(std::any_of(lengths.begin(), lengths.end(), [&](auto i) { return (i == 0); }) or
+           std::any_of(stride.begin(), stride.end(), [&](auto i) { return (i == 0); }))
+        {
+            MIGRAPHX_THROW("POOLING: size 0 pooling kernel or stride");
+        }
+
+        // TODO:  update lowering to run the reference
+        // code when OneDNN can't execute pooling for a CPU
+
+        // OneDNN has a limitation on padding size for pooling.  see
+        // https://oneapi-src.github.io/oneDNN/dev_guide_convolution.html#doxid-dev-guide-convolution
+
+        // padding = {2}; stride = {1}; lengths = {3} succeeds in oneDNN but
+        // padding = {2}; stride = {1}; lengths = {2} fails.
+        // Also, the referenced documentation contains a max. dimension size of 14 for the kernel
+        // ("weights tensor") that MIGraphX doesn't enforce.
    }

    size_t kdims() const
@@ -112,7 +157,11 @@ struct pooling
        const shape& input = inputs.at(0);
        auto padding_size  = padding.size();
        size_t kdims       = input.ndim() - 2;
-        if(input.ndim() != padding_size / 2 + 2 and input.ndim() != padding_size + 2)
+        if(input.ndim() < 3)
+        {
+            MIGRAPHX_THROW("POOLING: input must have 3 or more dimensions and be nonempty");
+        }
+        if(input.ndim() * 2 != padding_size + 4 and input.ndim() != padding_size + 2)
        {
            MIGRAPHX_THROW("POOLING: input and attribute size mismatch!");
        }
@@ -132,7 +181,7 @@ struct pooling
            }
            else
            {
-                // does not compute for optimals
+                // does not compute optimals
                auto min_spatial_dims = calc_spatial_dim_out(input.min_lens(), kdims);
                auto max_spatial_dims = calc_spatial_dim_out(input.max_lens(), kdims);
                for(size_t i = 0; i < kdims; ++i)
@@ -149,7 +198,7 @@ struct pooling

            std::vector<std::size_t> output_lens(input_lens.begin(), input_lens.begin() + 2);
            // Used for when normalize_compute_shape() is called again at model eval time
-            // for an originally dynamic shape. Since kernel shape is not used with dyn_global.
+            // for an originally dynamic shape. Kernel shape is not used with dyn_global.
            if(dyn_global)
            {
                for(size_t i = 0; i < kdims; ++i)
@@ -184,7 +233,7 @@ struct pooling

        double operator()(double x, double y) const { return x + std::pow(std::abs(y), p); }

-        double final(double x, std::size_t) const { return std::pow(x, 1. / p); }
+        double final(double x, std::size_t) const { return (p == 0) ? 1 : std::pow(x, 1. / p); }
    };

    struct avg_pool
@@ -222,37 +271,82 @@ struct pooling
    {
        auto in_s    = input.get_shape();
        auto in_lens = in_s.lens();
+
+        // For each element of output; i.e., for each placement of pooling kernel...
        par_for(output_shape.elements(), [&](auto i) {
            auto idx_o = output_shape.multi(i);
            auto n_dim = idx_o.size();
-            std::vector<std::size_t> win_start;
+            // starting offset of the pooling window
+            std::vector<int> win_start;
            std::vector<std::size_t> win_size;
+
+            // For each spatial dimension, find starting and ending index of pooling kernel
            for(std::size_t dim = 2; dim < n_dim; ++dim)
            {
                auto d_2 = dim - 2;
                int start =
                    static_cast<int>(idx_o[dim] * stride[d_2]) - static_cast<int>(padding[d_2]);
-                int end = std::min(start + kernel_dims[d_2], in_lens[dim]);
-                start   = std::max(start, 0);
+                int end;
+                // NOLINT
+                if(count_include_pad and ceil_mode and (mode != pooling_mode::max))
+                {
+                    // TODO: this block can't execute until we enable count_include_pad
+                    // Even when using padding, if in ceil_mode a window
+                    // could extend beyond the end of both input and
+                    // padding.  Clip out-of-bounds indexes but not padding.
+
+                    // Check if this kernel extends beyond the padding at end of dimension
+                    end = std::min(start + kernel_dims[d_2],
+                                   in_lens[dim] + static_cast<int>(padding[d_2]));
+                }
+                else
+                {
+                    // In non-ceiling mode, when
+                    // count_include_pad is false, or for max pooling, clip off padding.
+                    end   = std::min(start + kernel_dims[d_2], in_lens[dim]);
+                    start = std::max(start, 0);
+                }
                win_start.push_back(start);
+                if(end < start)
+                {
+                    // This error can be caused by misc. bad input combinations
+                    MIGRAPHX_THROW("POOLING:  invalid attributes");
+                }
                win_size.push_back(end - start);
            }

            shape win_shape{output_shape.type(), win_size};
            auto pool_size    = win_shape.elements();
            double output_val = op.template init<Type>();
+
+            // for each element in the window...
            shape_for_each(win_shape, [&](auto idx_w) {
+                // the coordinates of this element
                auto idx = idx_o;
+
+                // Add the kernel location idx_w and the offset win_start, for each dimension.
+                // Negative results are cast to very large unsigned integers.
                std::transform(idx_w.begin(),
                               idx_w.end(),
                               win_start.begin(),
                               idx.begin() + 2,
                               [](auto ii, auto jj) { return ii + jj; });
-                if(std::all_of(idx.begin() + 2, idx.end(), [&](auto ii) { return ii >= 0; }) and
-                   idx < in_lens)
+                // Check if any of coordinates are out of input tensor's range
+                if(std::mismatch(idx.begin() + 2,
+                                 idx.end(),
+                                 in_lens.begin() + 2,
+                                 in_lens.end(),
+                                 std::less<>{}) == std::make_pair(idx.end(), in_lens.end()))
                {
                    output_val = op(output_val, input[in_s.index(idx)]);
                }
+                else
+                {
+                    // this is a padding element.  Padding locations
+                    // don't contribute to average or max pooling total but can play in
+                    // lpnorm pooling.
+                    output_val = op(output_val, 0);
+                }
            });
            output[i] = Type(op.final(output_val, pool_size));
        });

--- a/test/ref_ops_test.cpp
+++ b/test/ref_ops_test.cpp
@@ -636,6 +636,76 @@ TEST_CASE(avgpool_dyn_test)
    EXPECT(migraphx::verify_range(results_vector, gold));
 }

+TEST_CASE(avgpool_dyn_pad_test)
+{
+    // pooling with dynamic input and padding, ceiling mode for output size
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+    auto s   = migraphx::shape{migraphx::shape::float_type, {{1, 4}, {1, 3}, {2, 4}, {2, 4}}};
+    auto x   = mm->add_parameter("X", s);
+    mm->add_instruction(migraphx::make_op("pooling",
+                                          {{"mode", migraphx::op::pooling_mode::average},
+                                           {"lengths", {2, 2}},
+                                           {"padding", {1, 0}},
+                                           {"ceil_mode", true},
+                                           {"stride", {2, 2}}}),
+                        x);
+    p.compile(migraphx::make_target("ref"));
+
+    std::vector<float> data{1, 2, 3, 4, 5, 6};
+
+    //      *  *  *
+    //      1  2  3        padding will look like this
+    //      4  5  6        The * are used when tiling the kernel
+    //      *  *  *        but are ignored in averaging
+
+    migraphx::shape input_fixed_shape{migraphx::shape::float_type, {1, 1, 2, 3}};
+    migraphx::parameter_map params;
+    params["X"] = migraphx::argument(input_fixed_shape, data.data());
+    auto result = p.eval(params).back();
+    std::vector<float> results_vector(12);
+    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
+    std::vector<float> gold{1.5, 3.0, 4.5, 6.0};
+    EXPECT(migraphx::verify_range(results_vector, gold));
+}
+
+TEST_CASE(avgpool_dyn_pad_ceil_test)
+{
+    // pooling with dynamic input and padding
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+    auto s   = migraphx::shape{migraphx::shape::float_type, {{1, 4}, {1, 3}, {2, 4}, {2, 4}}};
+    auto x   = mm->add_parameter("X", s);
+    mm->add_instruction(migraphx::make_op("pooling",
+                                          {{"mode", migraphx::op::pooling_mode::average},
+                                           {"lengths", {2, 3}},
+                                           {"padding", {1, 2}},
+                                           {"ceil_mode", true},
+                                           {"stride", {1, 1}}}),
+                        x);
+    p.compile(migraphx::make_target("ref"));
+
+    std::vector<float> data{1, 2, 3, 4};
+
+    //  * *  *  * * *
+    //  * *  1  2 * *      padded input will look like this
+    //  * *  3  4 * *      but the * are ignored in averaging
+    //  * *  *  * * *
+
+    migraphx::shape input_fixed_shape{migraphx::shape::float_type, {1, 1, 2, 2}};
+    migraphx::parameter_map params;
+    params["X"] = migraphx::argument(input_fixed_shape, data.data());
+    auto result = p.eval(params).back();
+    std::vector<float> results_vector(12);
+    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
+    // clang-format off
+    std::vector<float> gold{1.0, 1.5, 1.5, 2.0, 
+                            2.0, 2.5, 2.5, 3.0, 
+                            3.0, 3.5, 3.5, 4.0};
+    // clang-format on
+    EXPECT(migraphx::verify_range(results_vector, gold));
+}
+
 TEST_CASE(avgpool_rank3_stride2_test)
 {
    // 1D case 2, stride 2
@@ -647,40 +717,25 @@ TEST_CASE(avgpool_rank3_stride2_test)
    op.padding = {1};
    op.stride  = {2};

-    std::vector<float> data{1.6321,
-                            -2.4186,
-                            0.2239,
-                            -1.4232,
-                            0.8158,
-                            0.4103,
-                            -0.3149,
-                            -0.1361,
-                            -0.3442,
-                            2.007,
-                            0.4331,
-                            1.5295,
-                            0.9965,
-                            0.4766,
-                            1.0942,
-                            -0.2915};
+    // clang-format off
+    std::vector<float> data{1.6321, -2.4186, 0.2239, -1.4232, 
+                            0.8158, 0.4103, -0.3149, -0.1361,
+                            -0.3442, 2.007, 0.4331, 1.5295,
+                            0.9965, 0.4766, 1.0942, -0.2915};
+    // clang-format on
    auto l0 = mm->add_literal(migraphx::literal{s, data});
    mm->add_instruction(op, l0);
    p.compile(migraphx::make_target("ref"));
    auto result = p.eval({}).back();
    std::vector<float> results_vector;
    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
-    std::vector<float> gold{1.6321,
-                            -1.0974,
-                            -1.4232,
-                            0.8158,
-                            0.0477,
-                            -0.1361,
-                            -0.3442,
-                            1.22005,
-                            1.5295,
-                            0.9965,
-                            0.7854,
-                            -0.2915};
+
+    // clang-format off
+    std::vector<float> gold{1.6321, -1.09735, -1.4232,
+                            0.8158, 0.0477, -0.1361, 
+                            -0.3442, 1.22005, 1.5295,
+                            0.9965, 0.7854, -0.2915};
+    // clang-format on
    EXPECT(migraphx::verify_range(results_vector, gold));
 }

@@ -4238,6 +4293,27 @@ TEST_CASE(lppool_l1_norm_test)
    EXPECT(migraphx::verify_range(results_vector, gold));
 }

+// TODO: this tests compliance with a oneDNN rule and a feature that's commented out
+// in pooling.hpp
+// TEST_CASE(lppool_l1_norm_err_test)
+// {
+//     // padding too large for kernel size
+//     migraphx::program p;
+//     auto* mm    = p.get_main_module();
+//     auto s      = migraphx::shape{migraphx::shape::float_type, {1, 2, 5}};
+//     auto op     = migraphx::op::pooling{migraphx::op::pooling_mode::lpnorm};
+//     op.lengths  = {3};
+//     op.padding  = {2};
+//     op.stride   = {1};
+//     op.lp_order = 1;
+
+//     std::vector<float> data{0.3, 0.2, 0.4, 0.1, 0.8, 0.5, 0.9, 0.1, 0.1, 0.7};
+//     auto l0 = mm->add_literal(migraphx::literal{s, data});
+//     EXPECT(test::throws([&] {
+//             mm->add_instruction(op, l0);
+//         }));
+// }
+
 TEST_CASE(lppool_l2_norm_test)
 {
    // L2 norm test
@@ -4422,6 +4498,35 @@ TEST_CASE(maxpool_test)
    EXPECT(migraphx::verify_range(results_vector, c));
 }

+TEST_CASE(maxpool_pad_test)
+{
+    migraphx::program p;
+    auto* mm             = p.get_main_module();
+    std::vector<float> a = {-6, -5, -4, -3, -5, -1, 0, 1, 2, 3, 4, 5};
+    std::vector<float> c = {-4, -3, -4, -1, 2, 3, 4, 5};
+    migraphx::shape a_shape{migraphx::shape::float_type, {1, 2, 3, 2}};
+    auto al = mm->add_literal(migraphx::literal{a_shape, a});
+    mm->add_instruction(migraphx::make_op("pooling",
+                                          {{"mode", migraphx::op::pooling_mode::max},
+                                           {"padding", {1, 1}},
+                                           {"stride", {2, 2}},
+                                           {"lengths", {3, 2}}}),
+                        al);
+
+    //   * *  *  *                                           * *  *  *
+    //   * -6 -5 *                                           * 0  1  *
+    //   * -4 -3 *      padding will look like this          * 2  3  *
+    //   * -5 -1 *                  and this                 * 4  5  *
+    //   * *  *  *      The * values are actually -INF       * *  *  *
+
+    p.compile(migraphx::make_target("ref"));
+    auto result = p.eval({}).back();
+    std::vector<float> results_vector(8);
+    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
+
+    EXPECT(migraphx::verify_range(results_vector, c));
+}
+
 TEST_CASE(maxpool_rank3_test0)
 {
    // 1D case 1, input is 3D
@@ -4482,9 +4587,12 @@ TEST_CASE(maxpool_rank3_ceil_test)
    op.stride    = {2};
    op.ceil_mode = true;

-    std::vector<float> data{0.4975, -0.1226, -0.0405, -0.2861, -0.1227, -0.6186, -0.9618,
-                            0.6022, -0.1912, 1.1925,  0.5493,  0.1692,  -0.8039, -1.0281,
-                            0.9907, 0.477,   1.5001,  -1.1603, -1.361,  1.2556};
+    // clang-format off
+    std::vector<float> data{0.4975, -0.1226, -0.0405, -0.2861, -0.1227, 
+                        -0.6186, -0.9618, 0.6022, -0.1912, 1.1925,
+                        0.5493,  0.1692,  -0.8039, -1.0281, 0.9907, 
+                        0.477,   1.5001,  -1.1603, -1.361,  1.2556};
+    // clang-format on
    auto l0 = mm->add_literal(migraphx::literal{s, data});
    mm->add_instruction(op, l0);
    p.compile(migraphx::make_target("ref"));
@@ -4492,18 +4600,12 @@ TEST_CASE(maxpool_rank3_ceil_test)

    std::vector<float> results_vector;
    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
-    std::vector<float> gold{0.4975,
-                            -0.0405,
-                            -0.1227,
-                            -0.6186,
-                            0.6022,
-                            1.1925,
-                            0.5493,
-                            -0.8039,
-                            0.9907,
-                            1.5001,
-                            -1.1603,
-                            1.2556};
+
+    // clang-format off
+    std::vector<float> gold{0.4975, -0.0405, -0.1227, -0.6186,
+                            0.6022, 1.1925, 0.5493, -0.8039,
+                            0.9907, 1.5001, -1.1603, 1.2556};
+    // clang-format on
    EXPECT(migraphx::verify_range(results_vector, gold));
 }


--- a/test/verify/test_avg_pooling_pad.cpp
+++ b/test/verify/test_avg_pooling_pad.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "verify_program.hpp"
+#include <migraphx/program.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/op/pooling.hpp>
+
+struct test_avg_pooling_pad : verify_program<test_avg_pooling_pad>
+{
+    migraphx::program create_program() const
+    {
+        // pooling test with nonzero padding
+        migraphx::program p;
+        auto* mm = p.get_main_module();
+        auto input =
+            mm->add_parameter("x", migraphx::shape{migraphx::shape::float_type, {1, 3, 7}});
+        auto op = migraphx::op::pooling{migraphx::op::pooling_mode::average, {2}, {1}, {3}};
+        mm->add_instruction(op, input);
+        return p;
+    }
+};