box_ops.py 10.9 KB
Newer Older
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
1
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
2
3
4
5
6
7
8
9
10
11
12
13
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Yeqing Li's avatar
Yeqing Li committed
14

Abdullah Rashwan's avatar
Abdullah Rashwan committed
15
"""Yolo box ops."""
16
import math
17
import tensorflow as tf
Abdullah Rashwan's avatar
Abdullah Rashwan committed
18
from official.projects.yolo.ops import math_ops
19

20
21

def yxyx_to_xcycwh(box: tf.Tensor):
Abdullah Rashwan's avatar
Abdullah Rashwan committed
22
  """Converts boxes from yxyx to x_center, y_center, width, height.
23

24
  Args:
Abdullah Rashwan's avatar
Abdullah Rashwan committed
25
26
    box: any `Tensor` whose last dimension is 4 representing the coordinates of
      boxes in ymin, xmin, ymax, xmax.
27

28
  Returns:
Abdullah Rashwan's avatar
Abdullah Rashwan committed
29
    box: a `Tensor` whose shape is the same as `box` in new format.
30
  """
31
32
33
34
35
36
37
38
39
40
  with tf.name_scope('yxyx_to_xcycwh'):
    ymin, xmin, ymax, xmax = tf.split(box, 4, axis=-1)
    x_center = (xmax + xmin) / 2
    y_center = (ymax + ymin) / 2
    width = xmax - xmin
    height = ymax - ymin
    box = tf.concat([x_center, y_center, width, height], axis=-1)
  return box


Vishnu Banna's avatar
Vishnu Banna committed
41
def xcycwh_to_yxyx(box: tf.Tensor):
Abdullah Rashwan's avatar
Abdullah Rashwan committed
42
  """Converts boxes from x_center, y_center, width, height to yxyx format.
43

44
  Args:
Abdullah Rashwan's avatar
Abdullah Rashwan committed
45
46
    box: any `Tensor` whose last dimension is 4 representing the coordinates of
      boxes in x_center, y_center, width, height.
47

48
  Returns:
Abdullah Rashwan's avatar
Abdullah Rashwan committed
49
    box: a `Tensor` whose shape is the same as `box` in new format.
50
  """
Vishnu Banna's avatar
Vishnu Banna committed
51
52
53
54
55
56
57
  with tf.name_scope('xcycwh_to_yxyx'):
    xy, wh = tf.split(box, 2, axis=-1)
    xy_min = xy - wh / 2
    xy_max = xy + wh / 2
    x_min, y_min = tf.split(xy_min, 2, axis=-1)
    x_max, y_max = tf.split(xy_max, 2, axis=-1)
    box = tf.concat([y_min, x_min, y_max, x_max], axis=-1)
58
59
60
  return box


Abdullah Rashwan's avatar
Abdullah Rashwan committed
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
def intersect_and_union(box1, box2, yxyx=False):
  """Calculates the intersection and union between box1 and box2.

  Args:
    box1: any `Tensor` whose last dimension is 4 representing the coordinates of
      boxes.
    box2: any `Tensor` whose last dimension is 4 representing the coordinates of
      boxes.
    yxyx: a `bool` indicating whether the input box is of the format x_center
      y_center, width, height or y_min, x_min, y_max, x_max.

  Returns:
    intersection: a `Tensor` who represents the intersection.
    union: a `Tensor` who represents the union.
  """
  if not yxyx:
Vishnu Banna's avatar
Vishnu Banna committed
77
78
    box1_area = tf.reduce_prod(tf.split(box1, 2, axis=-1)[-1], axis=-1)
    box2_area = tf.reduce_prod(tf.split(box2, 2, axis=-1)[-1], axis=-1)
Abdullah Rashwan's avatar
Abdullah Rashwan committed
79
80
81
82
83
84
85
86
87
88
    box1 = xcycwh_to_yxyx(box1)
    box2 = xcycwh_to_yxyx(box2)

  b1mi, b1ma = tf.split(box1, 2, axis=-1)
  b2mi, b2ma = tf.split(box2, 2, axis=-1)
  intersect_mins = tf.math.maximum(b1mi, b2mi)
  intersect_maxes = tf.math.minimum(b1ma, b2ma)
  intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins, 0.0)
  intersection = tf.reduce_prod(intersect_wh, axis=-1)

Vishnu Banna's avatar
Vishnu Banna committed
89
90
91
  if yxyx:
    box1_area = tf.reduce_prod(b1ma - b1mi, axis=-1)
    box2_area = tf.reduce_prod(b2ma - b2mi, axis=-1)
Abdullah Rashwan's avatar
Abdullah Rashwan committed
92
93
  union = box1_area + box2_area - intersection
  return intersection, union
94

95

Vishnu Banna's avatar
Vishnu Banna committed
96
def smallest_encompassing_box(box1, box2, yxyx=False, clip=False):
Abdullah Rashwan's avatar
Abdullah Rashwan committed
97
  """Calculates the smallest box that encompasses box1 and box2.
98

99
  Args:
Abdullah Rashwan's avatar
Abdullah Rashwan committed
100
101
102
103
104
105
    box1: any `Tensor` whose last dimension is 4 representing the coordinates of
      boxes.
    box2: any `Tensor` whose last dimension is 4 representing the coordinates of
      boxes.
    yxyx: a `bool` indicating whether the input box is of the format x_center
      y_center, width, height or y_min, x_min, y_max, x_max.
106
    clip: a `bool`, whether or not to clip boxes.
107

108
  Returns:
Abdullah Rashwan's avatar
Abdullah Rashwan committed
109
110
111
    box_c: a `Tensor` whose last dimension is 4 representing the coordinates of
      boxes, the return format is y_min, x_min, y_max, x_max if yxyx is set to
      to True. In other words it will match the input format.
112
  """
Abdullah Rashwan's avatar
Abdullah Rashwan committed
113
114
115
116
117
118
119
120
121
122
123
124
125
126
  if not yxyx:
    box1 = xcycwh_to_yxyx(box1)
    box2 = xcycwh_to_yxyx(box2)

  b1mi, b1ma = tf.split(box1, 2, axis=-1)
  b2mi, b2ma = tf.split(box2, 2, axis=-1)

  bcmi = tf.math.minimum(b1mi, b2mi)
  bcma = tf.math.maximum(b1ma, b2ma)
  box_c = tf.concat([bcmi, bcma], axis=-1)

  if not yxyx:
    box_c = yxyx_to_xcycwh(box_c)

Vishnu Banna's avatar
Vishnu Banna committed
127
128
129
130
  if clip:
    bca = tf.reduce_prod(bcma - bcmi, keepdims=True, axis=-1)
    box_c = tf.where(bca <= 0.0, tf.zeros_like(box_c), box_c)
  return bcmi, bcma, box_c
131
132
133


def compute_iou(box1, box2, yxyx=False):
Abdullah Rashwan's avatar
Abdullah Rashwan committed
134
  """Calculates the intersection over union between box1 and box2.
135

136
  Args:
Abdullah Rashwan's avatar
Abdullah Rashwan committed
137
138
139
140
141
142
    box1: any `Tensor` whose last dimension is 4 representing the coordinates of
      boxes.
    box2: any `Tensor` whose last dimension is 4 representing the coordinates of
      boxes.
    yxyx: a `bool` indicating whether the input box is of the format x_center
      y_center, width, height or y_min, x_min, y_max, x_max.
143
144

  Returns:
Abdullah Rashwan's avatar
Abdullah Rashwan committed
145
    iou: a `Tensor` who represents the intersection over union.
146
  """
147
  with tf.name_scope('iou'):
Abdullah Rashwan's avatar
Abdullah Rashwan committed
148
149
    intersection, union = intersect_and_union(box1, box2, yxyx=yxyx)
    iou = math_ops.divide_no_nan(intersection, union)
150
151
152
  return iou


Vishnu Banna's avatar
Vishnu Banna committed
153
def compute_giou(box1, box2, yxyx=False):
Abdullah Rashwan's avatar
Abdullah Rashwan committed
154
  """Calculates the General intersection over union between box1 and box2.
155

156
  Args:
Abdullah Rashwan's avatar
Abdullah Rashwan committed
157
158
159
160
161
162
    box1: any `Tensor` whose last dimension is 4 representing the coordinates of
      boxes.
    box2: any `Tensor` whose last dimension is 4 representing the coordinates of
      boxes.
    yxyx: a `bool` indicating whether the input box is of the format x_center
      y_center, width, height or y_min, x_min, y_max, x_max.
163

164
  Returns:
Abdullah Rashwan's avatar
Abdullah Rashwan committed
165
    giou: a `Tensor` who represents the General intersection over union.
166
  """
167
  with tf.name_scope('giou'):
Abdullah Rashwan's avatar
Abdullah Rashwan committed
168
    if not yxyx:
Vishnu Banna's avatar
Vishnu Banna committed
169
170
171
172
      yxyx1 = xcycwh_to_yxyx(box1)
      yxyx2 = xcycwh_to_yxyx(box2)
    else:
      yxyx1, yxyx2 = box1, box2
173

Vishnu Banna's avatar
Vishnu Banna committed
174
175
    cmi, cma, _ = smallest_encompassing_box(yxyx1, yxyx2, yxyx=True)
    intersection, union = intersect_and_union(yxyx1, yxyx2, yxyx=True)
Abdullah Rashwan's avatar
Abdullah Rashwan committed
176
    iou = math_ops.divide_no_nan(intersection, union)
177

Vishnu Banna's avatar
Vishnu Banna committed
178
179
    bcwh = cma - cmi
    c = tf.math.reduce_prod(bcwh, axis=-1)
180

Abdullah Rashwan's avatar
Abdullah Rashwan committed
181
182
    regularization = math_ops.divide_no_nan((c - union), c)
    giou = iou - regularization
183
184
185
  return iou, giou


186
def compute_diou(box1, box2, beta=1.0, yxyx=False):
Abdullah Rashwan's avatar
Abdullah Rashwan committed
187
  """Calculates the distance intersection over union between box1 and box2.
188

189
  Args:
Abdullah Rashwan's avatar
Abdullah Rashwan committed
190
191
192
193
194
195
196
197
    box1: any `Tensor` whose last dimension is 4 representing the coordinates of
      boxes.
    box2: any `Tensor` whose last dimension is 4 representing the coordinates of
      boxes.
    beta: a `float` indicating the amount to scale the distance iou
      regularization term.
    yxyx: a `bool` indicating whether the input box is of the format x_center
      y_center, width, height or y_min, x_min, y_max, x_max.
198

199
  Returns:
Abdullah Rashwan's avatar
Abdullah Rashwan committed
200
    diou: a `Tensor` who represents the distance intersection over union.
201
  """
202
203
  with tf.name_scope('diou'):
    # compute center distance
Abdullah Rashwan's avatar
Abdullah Rashwan committed
204
    if not yxyx:
Vishnu Banna's avatar
Vishnu Banna committed
205
206
207
208
209
210
211
212
213
214
      xycc1, xycc2 = box1, box2
      yxyx1 = xcycwh_to_yxyx(box1)
      yxyx2 = xcycwh_to_yxyx(box2)
    else:
      yxyx1, yxyx2 = box1, box2
      xycc1 = yxyx_to_xcycwh(box1)
      xycc2 = yxyx_to_xcycwh(box2)

    cmi, cma, _ = smallest_encompassing_box(yxyx1, yxyx2, yxyx=True)
    intersection, union = intersect_and_union(yxyx1, yxyx2, yxyx=True)
Abdullah Rashwan's avatar
Abdullah Rashwan committed
215
216
    iou = math_ops.divide_no_nan(intersection, union)

Vishnu Banna's avatar
Vishnu Banna committed
217
218
219
    b1xy, _ = tf.split(xycc1, 2, axis=-1)
    b2xy, _ = tf.split(xycc2, 2, axis=-1)
    bcwh = cma - cmi
Abdullah Rashwan's avatar
Abdullah Rashwan committed
220
221
222
223
224
225
226

    center_dist = tf.reduce_sum((b1xy - b2xy)**2, axis=-1)
    c_diag = tf.reduce_sum(bcwh**2, axis=-1)

    regularization = math_ops.divide_no_nan(center_dist, c_diag)
    diou = iou - regularization**beta
  return iou, diou
227
228


Abdullah Rashwan's avatar
Abdullah Rashwan committed
229
230
def compute_ciou(box1, box2, yxyx=False, darknet=False):
  """Calculates the complete intersection over union between box1 and box2.
231

Abdullah Rashwan's avatar
Abdullah Rashwan committed
232
233
234
235
236
237
238
239
240
  Args:
    box1: any `Tensor` whose last dimension is 4 representing the coordinates of
      boxes.
    box2: any `Tensor` whose last dimension is 4 representing the coordinates of
      boxes.
    yxyx: a `bool` indicating whether the input box is of the format x_center
      y_center, width, height or y_min, x_min, y_max, x_max.
    darknet: a `bool` indicating whether the calling function is the YOLO
      darknet loss.
241

Abdullah Rashwan's avatar
Abdullah Rashwan committed
242
243
244
245
  Returns:
    ciou: a `Tensor` who represents the complete intersection over union.
  """
  with tf.name_scope('ciou'):
Vishnu Banna's avatar
Vishnu Banna committed
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
    if not yxyx:
      xycc1, xycc2 = box1, box2
      yxyx1 = xcycwh_to_yxyx(box1)
      yxyx2 = xcycwh_to_yxyx(box2)
    else:
      yxyx1, yxyx2 = box1, box2
      xycc1 = yxyx_to_xcycwh(box1)
      xycc2 = yxyx_to_xcycwh(box2)

    # Build the smallest encomapssing box.
    cmi, cma, _ = smallest_encompassing_box(yxyx1, yxyx2, yxyx=True)
    intersection, union = intersect_and_union(yxyx1, yxyx2, yxyx=True)
    iou = math_ops.divide_no_nan(intersection, union)

    b1xy, b1w, b1h = tf.split(xycc1, [2, 1, 1], axis=-1)
    b2xy, b2w, b2h = tf.split(xycc2, [2, 1, 1], axis=-1)
    bchw = cma - cmi

    # Center regularization
    center_dist = tf.reduce_sum((b1xy - b2xy)**2, axis=-1)
    c_diag = tf.reduce_sum(bchw**2, axis=-1)
    regularization = math_ops.divide_no_nan(center_dist, c_diag)

    # Computer aspect ratio consistency
    terma = math_ops.divide_no_nan(b1w, b1h)  # gt
    termb = math_ops.divide_no_nan(b2w, b2h)  # pred
    arcterm = tf.squeeze(
        tf.math.pow(tf.math.atan(termb) - tf.math.atan(terma), 2), axis=-1)
    v = (4 / math.pi**2) * arcterm

    # Compute the aspect ratio weight, should be treated as a constant
    a = tf.stop_gradient(math_ops.divide_no_nan(v, 1 - iou + v))

    if darknet:
      grad_scale = tf.stop_gradient(tf.square(b2w) + tf.square(b2h))
      v *= tf.squeeze(grad_scale, axis=-1)

    ciou = iou - regularization - (v * a)
Abdullah Rashwan's avatar
Abdullah Rashwan committed
284
  return iou, ciou
285
286


Vishnu Banna's avatar
Vishnu Banna committed
287
def aggregated_comparitive_iou(boxes1, boxes2=None, iou_type=0, beta=0.6):
Abdullah Rashwan's avatar
Abdullah Rashwan committed
288
289
290
  """Calculates the IOU between two set of boxes.

  Similar to bbox_overlap but far more versitile.
291

292
  Args:
Abdullah Rashwan's avatar
Abdullah Rashwan committed
293
294
295
296
297
298
299
300
301
    boxes1: a `Tensor` of shape [batch size, N, 4] representing the coordinates
      of boxes.
    boxes2: a `Tensor` of shape [batch size, N, 4] representing the coordinates
      of boxes.
    iou_type: `integer` representing the iou version to use, 0 is distance iou,
      1 is the general iou, 2 is the complete iou, any other number uses the
      standard iou.
    beta: `float` for the scaling quantity to apply to distance iou
      regularization.
302

303
  Returns:
Abdullah Rashwan's avatar
Abdullah Rashwan committed
304
305
    iou: a `Tensor` who represents the intersection over union in of the
      expected/input type.
306
  """
Abdullah Rashwan's avatar
Abdullah Rashwan committed
307
308
309
310
311
312
313
  boxes1 = tf.expand_dims(boxes1, axis=-2)

  if boxes2 is not None:
    boxes2 = tf.expand_dims(boxes2, axis=-3)
  else:
    boxes2 = tf.transpose(boxes1, perm=(0, 2, 1, 3))

314
  if iou_type == 0 or iou_type == 'diou':  # diou
Abdullah Rashwan's avatar
Abdullah Rashwan committed
315
    _, iou = compute_diou(boxes1, boxes2, beta=beta, yxyx=True)
316
  elif iou_type == 1 or iou_type == 'giou':  # giou
Abdullah Rashwan's avatar
Abdullah Rashwan committed
317
    _, iou = compute_giou(boxes1, boxes2, yxyx=True)
318
  elif iou_type == 2 or iou_type == 'ciou':  # ciou
Abdullah Rashwan's avatar
Abdullah Rashwan committed
319
320
321
322
    _, iou = compute_ciou(boxes1, boxes2, yxyx=True)
  else:
    iou = compute_iou(boxes1, boxes2, yxyx=True)
  return iou