"examples/pytorch/gatv2/train.py" did not exist on "788d8dd4ade144ee0f70cc8d03c07d782630e8d4"
boxes.py 12.4 KB
Newer Older
Aditya Oke's avatar
Aditya Oke committed
1
from typing import Tuple
2
3

import torch
4
import torchvision
5
from torch import Tensor
6
from torchvision.extension import _assert_has_ops
7

8
from ..utils import _log_api_usage_once
9
10
from ._box_convert import _box_cxcywh_to_xyxy, _box_xyxy_to_cxcywh, _box_xywh_to_xyxy, _box_xyxy_to_xywh

11

12
def nms(boxes: Tensor, scores: Tensor, iou_threshold: float) -> Tensor:
13
14
15
16
17
18
19
20
    """
    Performs non-maximum suppression (NMS) on the boxes according
    to their intersection-over-union (IoU).

    NMS iteratively removes lower scoring boxes which have an
    IoU greater than iou_threshold with another (higher scoring)
    box.

Francisco Massa's avatar
Francisco Massa committed
21
22
23
    If multiple boxes have the exact same score and satisfy the IoU
    criterion with respect to a reference box, the selected box is
    not guaranteed to be the same between CPU and GPU. This is similar
24
25
    to the behavior of argsort in PyTorch when repeated values are present.

26
27
    Args:
        boxes (Tensor[N, 4])): boxes to perform NMS on. They
28
29
            are expected to be in ``(x1, y1, x2, y2)`` format with ``0 <= x1 < x2`` and
            ``0 <= y1 < y2``.
30
31
        scores (Tensor[N]): scores for each one of the boxes
        iou_threshold (float): discards all overlapping boxes with IoU > iou_threshold
32

33
    Returns:
34
35
        Tensor: int64 tensor with the indices of the elements that have been kept
        by NMS, sorted in decreasing order of scores
36
    """
Kai Zhang's avatar
Kai Zhang committed
37
38
    if not torch.jit.is_scripting() and not torch.jit.is_tracing():
        _log_api_usage_once(nms)
39
    _assert_has_ops()
40
    return torch.ops.torchvision.nms(boxes, scores, iou_threshold)
41
42


43
44
45
46
47
48
def batched_nms(
    boxes: Tensor,
    scores: Tensor,
    idxs: Tensor,
    iou_threshold: float,
) -> Tensor:
49
50
51
52
53
54
    """
    Performs non-maximum suppression in a batched fashion.

    Each index value correspond to a category, and NMS
    will not be applied between elements of different categories.

55
56
    Args:
        boxes (Tensor[N, 4]): boxes where NMS will be performed. They
57
58
            are expected to be in ``(x1, y1, x2, y2)`` format with ``0 <= x1 < x2`` and
            ``0 <= y1 < y2``.
59
60
61
        scores (Tensor[N]): scores for each one of the boxes
        idxs (Tensor[N]): indices of the categories for each one of the boxes.
        iou_threshold (float): discards all overlapping boxes with IoU > iou_threshold
62

63
    Returns:
64
65
        Tensor: int64 tensor with the indices of the elements that have been kept by NMS, sorted
        in decreasing order of scores
66
    """
Kai Zhang's avatar
Kai Zhang committed
67
68
    if not torch.jit.is_scripting() and not torch.jit.is_tracing():
        _log_api_usage_once(batched_nms)
69
70
    # Benchmarks that drove the following thresholds are at
    # https://github.com/pytorch/vision/issues/1311#issuecomment-781329339
71
    if boxes.numel() > (4000 if boxes.device.type == "cpu" else 20000) and not torchvision._is_tracing():
72
73
74
75
76
77
78
79
80
81
82
83
84
        return _batched_nms_vanilla(boxes, scores, idxs, iou_threshold)
    else:
        return _batched_nms_coordinate_trick(boxes, scores, idxs, iou_threshold)


@torch.jit._script_if_tracing
def _batched_nms_coordinate_trick(
    boxes: Tensor,
    scores: Tensor,
    idxs: Tensor,
    iou_threshold: float,
) -> Tensor:
    # strategy: in order to perform NMS independently per class,
85
86
87
    # we add an offset to all the boxes. The offset is dependent
    # only on the class idx, and is large enough so that boxes
    # from different classes do not overlap
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
    if boxes.numel() == 0:
        return torch.empty((0,), dtype=torch.int64, device=boxes.device)
    max_coordinate = boxes.max()
    offsets = idxs.to(boxes) * (max_coordinate + torch.tensor(1).to(boxes))
    boxes_for_nms = boxes + offsets[:, None]
    keep = nms(boxes_for_nms, scores, iou_threshold)
    return keep


@torch.jit._script_if_tracing
def _batched_nms_vanilla(
    boxes: Tensor,
    scores: Tensor,
    idxs: Tensor,
    iou_threshold: float,
) -> Tensor:
    # Based on Detectron2 implementation, just manually call nms() on each class independently
    keep_mask = torch.zeros_like(scores, dtype=torch.bool)
    for class_id in torch.unique(idxs):
        curr_indices = torch.where(idxs == class_id)[0]
        curr_keep_indices = nms(boxes[curr_indices], scores[curr_indices], iou_threshold)
        keep_mask[curr_indices[curr_keep_indices]] = True
    keep_indices = torch.where(keep_mask)[0]
    return keep_indices[scores[keep_indices].sort(descending=True)[1]]
112
113


114
def remove_small_boxes(boxes: Tensor, min_size: float) -> Tensor:
115
116
117
    """
    Remove boxes which contains at least one side smaller than min_size.

118
    Args:
119
120
        boxes (Tensor[N, 4]): boxes in ``(x1, y1, x2, y2)`` format
            with ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
eellison's avatar
eellison committed
121
        min_size (float): minimum size
122
123

    Returns:
124
125
        Tensor[K]: indices of the boxes that have both sides
        larger than min_size
126
    """
Kai Zhang's avatar
Kai Zhang committed
127
128
    if not torch.jit.is_scripting() and not torch.jit.is_tracing():
        _log_api_usage_once(remove_small_boxes)
129
130
    ws, hs = boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1]
    keep = (ws >= min_size) & (hs >= min_size)
131
    keep = torch.where(keep)[0]
132
133
134
    return keep


135
def clip_boxes_to_image(boxes: Tensor, size: Tuple[int, int]) -> Tensor:
136
    """
137
138
    Clip boxes so that they lie inside an image of size `size`.

139
    Args:
140
141
        boxes (Tensor[N, 4]): boxes in ``(x1, y1, x2, y2)`` format
            with ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
142
        size (Tuple[height, width]): size of the image
143
144

    Returns:
145
        Tensor[N, 4]: clipped boxes
146
    """
Kai Zhang's avatar
Kai Zhang committed
147
148
    if not torch.jit.is_scripting() and not torch.jit.is_tracing():
        _log_api_usage_once(clip_boxes_to_image)
149
150
151
152
    dim = boxes.dim()
    boxes_x = boxes[..., 0::2]
    boxes_y = boxes[..., 1::2]
    height, width = size
153
154
155
156
157
158
159
160
161
162

    if torchvision._is_tracing():
        boxes_x = torch.max(boxes_x, torch.tensor(0, dtype=boxes.dtype, device=boxes.device))
        boxes_x = torch.min(boxes_x, torch.tensor(width, dtype=boxes.dtype, device=boxes.device))
        boxes_y = torch.max(boxes_y, torch.tensor(0, dtype=boxes.dtype, device=boxes.device))
        boxes_y = torch.min(boxes_y, torch.tensor(height, dtype=boxes.dtype, device=boxes.device))
    else:
        boxes_x = boxes_x.clamp(min=0, max=width)
        boxes_y = boxes_y.clamp(min=0, max=height)

163
164
165
166
    clipped_boxes = torch.stack((boxes_x, boxes_y), dim=dim)
    return clipped_boxes.reshape(boxes.shape)


167
168
169
170
171
172
def box_convert(boxes: Tensor, in_fmt: str, out_fmt: str) -> Tensor:
    """
    Converts boxes from given in_fmt to out_fmt.
    Supported in_fmt and out_fmt are:

    'xyxy': boxes are represented via corners, x1, y1 being top left and x2, y2 being bottom right.
173
    This is the format that torchvision utilities expect.
174
175
176
177
178
179

    'xywh' : boxes are represented via corner, width and height, x1, y2 being top left, w, h being width and height.

    'cxcywh' : boxes are represented via centre, width and height, cx, cy being center of box, w, h
    being width and height.

180
    Args:
181
182
183
184
185
        boxes (Tensor[N, 4]): boxes which will be converted.
        in_fmt (str): Input format of given boxes. Supported formats are ['xyxy', 'xywh', 'cxcywh'].
        out_fmt (str): Output format of given boxes. Supported formats are ['xyxy', 'xywh', 'cxcywh']

    Returns:
186
        Tensor[N, 4]: Boxes into converted format.
187
    """
Kai Zhang's avatar
Kai Zhang committed
188
189
    if not torch.jit.is_scripting() and not torch.jit.is_tracing():
        _log_api_usage_once(box_convert)
190
    allowed_fmts = ("xyxy", "xywh", "cxcywh")
191
192
    if in_fmt not in allowed_fmts or out_fmt not in allowed_fmts:
        raise ValueError("Unsupported Bounding Box Conversions for given in_fmt and out_fmt")
193
194

    if in_fmt == out_fmt:
195
        return boxes.clone()
196

197
    if in_fmt != "xyxy" and out_fmt != "xyxy":
198
        # convert to xyxy and change in_fmt xyxy
199
        if in_fmt == "xywh":
200
            boxes = _box_xywh_to_xyxy(boxes)
201
        elif in_fmt == "cxcywh":
202
            boxes = _box_cxcywh_to_xyxy(boxes)
203
        in_fmt = "xyxy"
204
205
206
207
208
209
210
211
212
213
214
215

    if in_fmt == "xyxy":
        if out_fmt == "xywh":
            boxes = _box_xyxy_to_xywh(boxes)
        elif out_fmt == "cxcywh":
            boxes = _box_xyxy_to_cxcywh(boxes)
    elif out_fmt == "xyxy":
        if in_fmt == "xywh":
            boxes = _box_xywh_to_xyxy(boxes)
        elif in_fmt == "cxcywh":
            boxes = _box_cxcywh_to_xyxy(boxes)
    return boxes
216
217


218
219
220
221
222
223
224
225
def _upcast(t: Tensor) -> Tensor:
    # Protects from numerical overflows in multiplications by upcasting to the equivalent higher type
    if t.is_floating_point():
        return t if t.dtype in (torch.float32, torch.float64) else t.float()
    else:
        return t if t.dtype in (torch.int32, torch.int64) else t.int()


226
def box_area(boxes: Tensor) -> Tensor:
227
    """
228
    Computes the area of a set of bounding boxes, which are specified by their
229
    (x1, y1, x2, y2) coordinates.
230

231
    Args:
232
        boxes (Tensor[N, 4]): boxes for which the area will be computed. They
233
234
            are expected to be in (x1, y1, x2, y2) format with
            ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
235
236

    Returns:
237
        Tensor[N]: the area for each box
238
    """
Kai Zhang's avatar
Kai Zhang committed
239
240
    if not torch.jit.is_scripting() and not torch.jit.is_tracing():
        _log_api_usage_once(box_area)
241
    boxes = _upcast(boxes)
242
243
244
245
246
    return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])


# implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
# with slight modifications
247
248
249
250
251
252
253
def _box_inter_union(boxes1: Tensor, boxes2: Tensor) -> Tuple[Tensor, Tensor]:
    area1 = box_area(boxes1)
    area2 = box_area(boxes2)

    lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # [N,M,2]
    rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # [N,M,2]

254
    wh = _upcast(rb - lt).clamp(min=0)  # [N,M,2]
255
256
257
258
259
260
261
    inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]

    union = area1[:, None] + area2 - inter

    return inter, union


262
def box_iou(boxes1: Tensor, boxes2: Tensor) -> Tensor:
263
    """
264
    Return intersection-over-union (Jaccard index) between two sets of boxes.
265

266
267
    Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with
    ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
268

269
    Args:
270
271
        boxes1 (Tensor[N, 4]): first set of boxes
        boxes2 (Tensor[M, 4]): second set of boxes
272
273

    Returns:
274
        Tensor[N, M]: the NxM matrix containing the pairwise IoU values for every element in boxes1 and boxes2
275
    """
Kai Zhang's avatar
Kai Zhang committed
276
277
    if not torch.jit.is_scripting() and not torch.jit.is_tracing():
        _log_api_usage_once(box_iou)
278
279
    inter, union = _box_inter_union(boxes1, boxes2)
    iou = inter / union
280
    return iou
Aditya Oke's avatar
Aditya Oke committed
281
282
283
284
285


# Implementation adapted from https://github.com/facebookresearch/detr/blob/master/util/box_ops.py
def generalized_box_iou(boxes1: Tensor, boxes2: Tensor) -> Tensor:
    """
286
    Return generalized intersection-over-union (Jaccard index) between two sets of boxes.
Aditya Oke's avatar
Aditya Oke committed
287

288
289
    Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with
    ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
Aditya Oke's avatar
Aditya Oke committed
290

291
    Args:
292
293
        boxes1 (Tensor[N, 4]): first set of boxes
        boxes2 (Tensor[M, 4]): second set of boxes
Aditya Oke's avatar
Aditya Oke committed
294
295

    Returns:
296
        Tensor[N, M]: the NxM matrix containing the pairwise generalized IoU values
Aditya Oke's avatar
Aditya Oke committed
297
298
        for every element in boxes1 and boxes2
    """
Kai Zhang's avatar
Kai Zhang committed
299
300
    if not torch.jit.is_scripting() and not torch.jit.is_tracing():
        _log_api_usage_once(generalized_box_iou)
Aditya Oke's avatar
Aditya Oke committed
301

302
    inter, union = _box_inter_union(boxes1, boxes2)
Aditya Oke's avatar
Aditya Oke committed
303
304
305
306
307
    iou = inter / union

    lti = torch.min(boxes1[:, None, :2], boxes2[:, :2])
    rbi = torch.max(boxes1[:, None, 2:], boxes2[:, 2:])

308
    whi = _upcast(rbi - lti).clamp(min=0)  # [N,M,2]
Aditya Oke's avatar
Aditya Oke committed
309
310
311
    areai = whi[:, :, 0] * whi[:, :, 1]

    return iou - (areai - union) / areai
312
313
314
315


def masks_to_boxes(masks: torch.Tensor) -> torch.Tensor:
    """
316
    Compute the bounding boxes around the provided masks.
317

318
    Returns a [N, 4] tensor containing bounding boxes. The boxes are in ``(x1, y1, x2, y2)`` format with
319
320
321
    ``0 <= x1 < x2`` and ``0 <= y1 < y2``.

    Args:
322
323
        masks (Tensor[N, H, W]): masks to transform where N is the number of masks
            and (H, W) are the spatial dimensions.
324
325
326
327

    Returns:
        Tensor[N, 4]: bounding boxes
    """
Kai Zhang's avatar
Kai Zhang committed
328
329
    if not torch.jit.is_scripting() and not torch.jit.is_tracing():
        _log_api_usage_once(masks_to_boxes)
330
    if masks.numel() == 0:
331
        return torch.zeros((0, 4), device=masks.device, dtype=torch.float)
332
333
334

    n = masks.shape[0]

335
    bounding_boxes = torch.zeros((n, 4), device=masks.device, dtype=torch.float)
336
337

    for index, mask in enumerate(masks):
338
        y, x = torch.where(mask != 0)
339
340
341
342
343
344
345

        bounding_boxes[index, 0] = torch.min(x)
        bounding_boxes[index, 1] = torch.min(y)
        bounding_boxes[index, 2] = torch.max(x)
        bounding_boxes[index, 3] = torch.max(y)

    return bounding_boxes