nonmaxsuppression.hpp 13 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
/*
 * The MIT License (MIT)
 *
 * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#ifndef MIGRAPHX_GUARD_OPERATORS_NONMAXSUPPRESSION_HPP
#define MIGRAPHX_GUARD_OPERATORS_NONMAXSUPPRESSION_HPP

#include <cmath>
#include <queue>
#include <cstdint>
#include <iterator>
#include <migraphx/config.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/float_equal.hpp>
#include <migraphx/algorithm.hpp>
#include <migraphx/tensor_view.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/output_iterator.hpp>
Charlie Lin's avatar
Charlie Lin committed
39
#include <migraphx/argument.hpp>
40
41
42
43
44
45
46
47

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {

struct nonmaxsuppression
{
    bool center_point_box = false;
Charlie Lin's avatar
Charlie Lin committed
48
    bool use_dyn_output   = false;
49
50
51
52

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
Charlie Lin's avatar
Charlie Lin committed
53
54
        return pack(f(self.center_point_box, "center_point_box"),
                    f(self.use_dyn_output, "use_dyn_output"));
55
56
57
58
59
60
61
    }

    std::string name() const { return "nonmaxsuppression"; }

    shape compute_shape(std::vector<shape> inputs) const
    {
        // requires at least 2 inputs
Charlie Lin's avatar
Charlie Lin committed
62
63
64
65
        check_shapes{{inputs.at(0), inputs.at(1)}, *this, true}.only_dims(3).same_ndims();
        auto boxes_max_lens = inputs.at(0).max_lens();
        // num batches * num boxes
        const auto max_num_boxes = boxes_max_lens.at(0) * boxes_max_lens.at(1);
66

Charlie Lin's avatar
Charlie Lin committed
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
        auto fixed_shape_error_check = [&]() {
            auto lens = inputs.front().lens();
            if(lens[1] != inputs.at(1).lens()[2])
            {
                MIGRAPHX_THROW(
                    "NonMaxSuppression: spatial dimension mismatch between boxes and scores input");
            }
            if(lens[0] != inputs.at(1).lens()[0])
            {
                MIGRAPHX_THROW(
                    "NonMaxSuppression: number of batches mismatch between boxes and scores input");
            }
        };

        if(use_dyn_output)
82
        {
Charlie Lin's avatar
Charlie Lin committed
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
            if(inputs.at(0).dynamic())
            {
                // both boxes and scores should be dynamic
                // check dynamic dimensions are consistent
                const auto boxes_dims  = inputs.at(0).dyn_dims();
                const auto scores_dims = inputs.at(1).dyn_dims();
                if(boxes_dims.at(1) != scores_dims.at(2))
                {
                    MIGRAPHX_THROW("NonMaxSuppression: dynamic spatial dimension mismatch between "
                                   "boxes and scores input");
                }
                if(boxes_dims.at(0) != scores_dims.at(0))
                {
                    MIGRAPHX_THROW("NonMaxSuppression: dynamic number of batches mismatch between "
                                   "boxes and scores input");
                }
            }
            else if(inputs.at(1).dynamic())
            {
                // scores has dynamic shape, boxes fixed shape
                // check that it is only a dynamic number of classes
                const auto scores_dims = inputs.at(1).dyn_dims();
                const auto boxes_lens  = inputs.at(0).lens();
                if(not scores_dims.at(0).is_fixed() or scores_dims.at(0).max != boxes_lens.at(0))
                {
                    MIGRAPHX_THROW("NonMaxSuppression: scores dynamic num_classes; num_batches not "
                                   "fixed or mismatched");
                }
                if(not scores_dims.at(2).is_fixed() or scores_dims.at(2).max != boxes_lens.at(1))
                {
                    MIGRAPHX_THROW("NonMaxSuppression: scores dynamic num_classes; "
                                   "spatial_dimension not fixed or mismatches");
                }
            }
            else
            {
                fixed_shape_error_check();
            }
            std::vector<shape::dynamic_dimension> out_lens = {};
            out_lens.push_back({0, max_num_boxes, 0});
            out_lens.push_back({3, 3, 0});
            return {shape::int64_type, out_lens};
125
        }
Charlie Lin's avatar
Charlie Lin committed
126
        else
127
        {
Charlie Lin's avatar
Charlie Lin committed
128
129
130
131
132
133
134
135
            if(inputs.at(0).dynamic() or inputs.at(1).dynamic())
            {
                MIGRAPHX_THROW(
                    "NonMaxSuppression: dynamic input shape with use_dyn_output set to false");
            }
            fixed_shape_error_check();
            std::vector<std::size_t> out_lens = {max_num_boxes, 3};
            return {shape::int64_type, out_lens};
136
137
138
139
140
        }
    }

    struct box
    {
141
142
        std::array<double, 2> x;
        std::array<double, 2> y;
143
144
145
146
147
148
149

        void sort()
        {
            std::sort(x.begin(), x.end());
            std::sort(y.begin(), y.end());
        }

150
        std::array<double, 2>& operator[](std::size_t i) { return i == 0 ? x : y; }
151

152
        double area() const
153
154
155
156
157
158
159
160
        {
            assert(std::is_sorted(x.begin(), x.end()));
            assert(std::is_sorted(y.begin(), y.end()));
            return (x[1] - x[0]) * (y[1] - y[0]);
        }
    };

    template <class T>
161
    box batch_box(T boxes, std::size_t box_idx) const
162
163
    {
        box result{};
164
        auto start = boxes + 4 * box_idx;
165
166
        if(center_point_box)
        {
167
168
169
170
171
172
            double half_width  = start[2] / 2.0;
            double half_height = start[3] / 2.0;
            double x_center    = start[0];
            double y_center    = start[1];
            result.x           = {x_center - half_width, x_center + half_width};
            result.y           = {y_center - half_height, y_center + half_height};
173
174
175
        }
        else
        {
176
177
            result.x = {static_cast<double>(start[1]), static_cast<double>(start[3])};
            result.y = {static_cast<double>(start[0]), static_cast<double>(start[2])};
178
179
180
181
182
        }

        return result;
    }

183
    inline bool suppress_by_iou(box b1, box b2, double iou_threshold) const
184
185
186
187
188
189
190
191
192
193
194
    {
        b1.sort();
        b2.sort();

        box intersection{};
        for(auto i : range(2))
        {
            intersection[i][0] = std::max(b1[i][0], b2[i][0]);
            intersection[i][1] = std::min(b1[i][1], b2[i][1]);
        }

195
        std::vector<std::array<double, 2>> bbox = {intersection.x, intersection.y};
196
197
198
199
200
201
202
        if(std::any_of(bbox.begin(), bbox.end(), [](auto bx) {
               return not std::is_sorted(bx.begin(), bx.end());
           }))
        {
            return false;
        }

203
204
205
206
        const double area1             = b1.area();
        const double area2             = b2.area();
        const double intersection_area = intersection.area();
        const double union_area        = area1 + area2 - intersection_area;
207
208
209
210
211
212

        if(area1 <= .0f or area2 <= .0f or union_area <= .0f)
        {
            return false;
        }

213
        const double intersection_over_union = intersection_area / union_area;
214
215
216
217

        return intersection_over_union > iou_threshold;
    }

218
219
220
221
    // filter boxes below score_threshold
    template <class T>
    std::priority_queue<std::pair<double, int64_t>>
    filter_boxes_by_score(T scores_start, std::size_t num_boxes, double score_threshold) const
222
    {
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
        std::priority_queue<std::pair<double, int64_t>> boxes_heap;
        auto insert_to_boxes_heap =
            make_function_output_iterator([&](const auto& x) { boxes_heap.push(x); });
        int64_t box_idx = 0;
        transform_if(
            scores_start,
            scores_start + num_boxes,
            insert_to_boxes_heap,
            [&](auto sc) {
                box_idx++;
                return sc >= score_threshold;
            },
            [&](auto sc) { return std::make_pair(sc, box_idx - 1); });
        return boxes_heap;
    }
238

239
    template <class Output, class Boxes, class Scores>
Charlie Lin's avatar
Charlie Lin committed
240
241
242
243
244
245
246
    std::size_t compute_nms(Output output,
                            Boxes boxes,
                            Scores scores,
                            const shape& max_output_shape,
                            std::size_t max_output_boxes_per_class,
                            double iou_threshold,
                            double score_threshold) const
247
248
249
250
251
252
253
254
    {
        std::fill(output.begin(), output.end(), 0);
        const auto& lens       = scores.get_shape().lens();
        const auto num_batches = lens[0];
        const auto num_classes = lens[1];
        const auto num_boxes   = lens[2];
        // boxes of a class with NMS applied [score, index]
        std::vector<std::pair<double, int64_t>> selected_boxes_inside_class;
255
        std::vector<int64_t> selected_indices;
Charlie Lin's avatar
Charlie Lin committed
256
        selected_boxes_inside_class.reserve(max_output_shape.elements());
257
258
        // iterate over batches and classes
        shape comp_s{shape::double_type, {num_batches, num_classes}};
259
        shape_for_each(comp_s, [&](auto idx) {
260
261
262
263
264
265
266
            auto batch_idx = idx[0];
            auto class_idx = idx[1];
            // index offset for this class
            auto scores_start = scores.begin() + (batch_idx * num_classes + class_idx) * num_boxes;
            // iterator to first value of this batch
            auto batch_boxes_start = boxes.begin() + batch_idx * num_boxes * 4;
            auto boxes_heap = filter_boxes_by_score(scores_start, num_boxes, score_threshold);
267
268
            selected_boxes_inside_class.clear();
            // Get the next box with top score, filter by iou_threshold
269
            while(not boxes_heap.empty() &&
270
271
                  selected_boxes_inside_class.size() < max_output_boxes_per_class)
            {
272
273
274
275
276
277
278
279
280
281
282
283
                // Check with existing selected boxes for this class, remove box if it
                // exceeds the IOU (Intersection Over Union) threshold
                const auto next_top_score = boxes_heap.top();
                bool not_selected =
                    std::any_of(selected_boxes_inside_class.begin(),
                                selected_boxes_inside_class.end(),
                                [&](auto selected_index) {
                                    return this->suppress_by_iou(
                                        batch_box(batch_boxes_start, next_top_score.second),
                                        batch_box(batch_boxes_start, selected_index.second),
                                        iou_threshold);
                                });
284
285
286
287

                if(not not_selected)
                {
                    selected_boxes_inside_class.push_back(next_top_score);
288
289
                    selected_indices.push_back(batch_idx);
                    selected_indices.push_back(class_idx);
290
291
                    selected_indices.push_back(next_top_score.second);
                }
292
                boxes_heap.pop();
293
294
            }
        });
295
        std::copy(selected_indices.begin(), selected_indices.end(), output.begin());
Charlie Lin's avatar
Charlie Lin committed
296
        return selected_indices.size() / 3;
297
298
299
300
    }

    argument compute(const shape& output_shape, std::vector<argument> args) const
    {
Charlie Lin's avatar
Charlie Lin committed
301
302
303
        // make buffer of maximum size
        shape max_output_shape = {output_shape.type(), output_shape.max_lens()};
        argument result{max_output_shape};
304

305
306
307
308
309
310
        std::size_t max_output_boxes_per_class =
            (args.size() > 2) ? (args.at(2).at<std::size_t>()) : 0;
        if(max_output_boxes_per_class == 0)
        {
            return result;
        }
Charlie Lin's avatar
Charlie Lin committed
311
312
313
        double iou_threshold     = (args.size() > 3) ? (args.at(3).at<double>()) : 0.0f;
        double score_threshold   = (args.size() > 4) ? (args.at(4).at<double>()) : 0.0f;
        std::size_t num_selected = 0;
314
315
316

        result.visit([&](auto output) {
            visit_all(args[0], args[1])([&](auto boxes, auto scores) {
Charlie Lin's avatar
Charlie Lin committed
317
318
319
320
321
322
323
                num_selected = compute_nms(output,
                                           boxes,
                                           scores,
                                           max_output_shape,
                                           max_output_boxes_per_class,
                                           iou_threshold,
                                           score_threshold);
324
            });
325
        });
Charlie Lin's avatar
Charlie Lin committed
326
327
328
329
330
331
332
333
        if(use_dyn_output)
        {
            return result.reshape({output_shape.type(), {num_selected, 3}});
        }
        else
        {
            return result;
        }
334
335
336
337
338
339
340
341
    }
};

} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx

#endif