iou_similarity.py 5.7 KB
Newer Older
Yeqing Li's avatar
Yeqing Li committed
1
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Zhenyu Tan's avatar
Zhenyu Tan committed
2
3
4
5
6
7
8
9
10
11
12
13
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Yeqing Li's avatar
Yeqing Li committed
14

Zhenyu Tan's avatar
Zhenyu Tan committed
15
16
17
18
19
20
21
22
"""Region Similarity Calculators."""

import tensorflow as tf


def area(box):
  """Computes area of boxes.

Zhenyu Tan's avatar
Zhenyu Tan committed
23
24
25
  B: batch_size
  N: number of boxes

Zhenyu Tan's avatar
Zhenyu Tan committed
26
  Args:
Zhenyu Tan's avatar
Zhenyu Tan committed
27
    box: a float Tensor with [N, 4], or [B, N, 4].
Zhenyu Tan's avatar
Zhenyu Tan committed
28
29

  Returns:
Zhenyu Tan's avatar
Zhenyu Tan committed
30
    a float Tensor with [N], or [B, N]
Zhenyu Tan's avatar
Zhenyu Tan committed
31
32
33
  """
  with tf.name_scope('Area'):
    y_min, x_min, y_max, x_max = tf.split(
Zhenyu Tan's avatar
Zhenyu Tan committed
34
35
        value=box, num_or_size_splits=4, axis=-1)
    return tf.squeeze((y_max - y_min) * (x_max - x_min), axis=-1)
Zhenyu Tan's avatar
Zhenyu Tan committed
36
37


Zhenyu Tan's avatar
Zhenyu Tan committed
38
def intersection(gt_boxes, boxes):
Zhenyu Tan's avatar
Zhenyu Tan committed
39
40
  """Compute pairwise intersection areas between boxes.

Zhenyu Tan's avatar
Zhenyu Tan committed
41
42
43
44
  B: batch_size
  N: number of groundtruth boxes.
  M: number of anchor boxes.

Zhenyu Tan's avatar
Zhenyu Tan committed
45
  Args:
Zhenyu Tan's avatar
Zhenyu Tan committed
46
47
    gt_boxes: a float Tensor with [N, 4], or [B, N, 4]
    boxes: a float Tensor with [M, 4], or [B, M, 4]
Zhenyu Tan's avatar
Zhenyu Tan committed
48
49

  Returns:
Zhenyu Tan's avatar
Zhenyu Tan committed
50
51
    a float Tensor with shape [N, M] or [B, N, M] representing pairwise
      intersections.
Zhenyu Tan's avatar
Zhenyu Tan committed
52
53
54
  """
  with tf.name_scope('Intersection'):
    y_min1, x_min1, y_max1, x_max1 = tf.split(
Zhenyu Tan's avatar
Zhenyu Tan committed
55
        value=gt_boxes, num_or_size_splits=4, axis=-1)
Zhenyu Tan's avatar
Zhenyu Tan committed
56
    y_min2, x_min2, y_max2, x_max2 = tf.split(
Zhenyu Tan's avatar
Zhenyu Tan committed
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
        value=boxes, num_or_size_splits=4, axis=-1)

    boxes_rank = len(boxes.shape)
    perm = [1, 0] if boxes_rank == 2 else [0, 2, 1]
    # [N, M] or [B, N, M]
    y_min_max = tf.minimum(y_max1, tf.transpose(y_max2, perm))
    y_max_min = tf.maximum(y_min1, tf.transpose(y_min2, perm))
    x_min_max = tf.minimum(x_max1, tf.transpose(x_max2, perm))
    x_max_min = tf.maximum(x_min1, tf.transpose(x_min2, perm))

    intersect_heights = y_min_max - y_max_min
    intersect_widths = x_min_max - x_max_min
    zeros_t = tf.cast(0, intersect_heights.dtype)
    intersect_heights = tf.maximum(zeros_t, intersect_heights)
    intersect_widths = tf.maximum(zeros_t, intersect_widths)
Zhenyu Tan's avatar
Zhenyu Tan committed
72
73
74
    return intersect_heights * intersect_widths


Zhenyu Tan's avatar
Zhenyu Tan committed
75
def iou(gt_boxes, boxes):
Zhenyu Tan's avatar
Zhenyu Tan committed
76
77
78
  """Computes pairwise intersection-over-union between box collections.

  Args:
Zhenyu Tan's avatar
Zhenyu Tan committed
79
80
    gt_boxes: a float Tensor with [N, 4].
    boxes: a float Tensor with [M, 4].
Zhenyu Tan's avatar
Zhenyu Tan committed
81
82

  Returns:
Zhenyu Tan's avatar
Zhenyu Tan committed
83
    a Tensor with shape [N, M] representing pairwise iou scores.
Zhenyu Tan's avatar
Zhenyu Tan committed
84
  """
Zhenyu Tan's avatar
Zhenyu Tan committed
85
86
87
88
89
90
91
92
93
94
95
96
97
  with tf.name_scope('IOU'):
    intersections = intersection(gt_boxes, boxes)
    gt_boxes_areas = area(gt_boxes)
    boxes_areas = area(boxes)
    boxes_rank = len(boxes_areas.shape)
    boxes_axis = 1 if (boxes_rank == 2) else 0
    gt_boxes_areas = tf.expand_dims(gt_boxes_areas, -1)
    boxes_areas = tf.expand_dims(boxes_areas, boxes_axis)
    unions = gt_boxes_areas + boxes_areas
    unions = unions - intersections
    return tf.where(
        tf.equal(intersections, 0.0), tf.zeros_like(intersections),
        tf.truediv(intersections, unions))
Zhenyu Tan's avatar
Zhenyu Tan committed
98
99


Zhenyu Tan's avatar
Zhenyu Tan committed
100
class IouSimilarity:
Zhenyu Tan's avatar
Zhenyu Tan committed
101
102
103
104
  """Class to compute similarity based on Intersection over Union (IOU) metric.

  """

Zhenyu Tan's avatar
Zhenyu Tan committed
105
106
107
108
  def __init__(self, mask_val=-1):
    self.mask_val = mask_val

  def __call__(self, boxes_1, boxes_2, boxes_1_masks=None, boxes_2_masks=None):
Zhenyu Tan's avatar
Zhenyu Tan committed
109
110
    """Compute pairwise IOU similarity between ground truth boxes and anchors.

Zhenyu Tan's avatar
Zhenyu Tan committed
111
112
113
114
    B: batch_size
    N: Number of groundtruth boxes.
    M: Number of anchor boxes.

Zhenyu Tan's avatar
Zhenyu Tan committed
115
    Args:
Zhenyu Tan's avatar
Zhenyu Tan committed
116
117
118
119
120
      boxes_1: a float Tensor with M or B * M boxes.
      boxes_2: a float Tensor with N or B * N boxes, the rank must be less than
        or equal to rank of `boxes_1`.
      boxes_1_masks: a boolean Tensor with M or B * M boxes. Optional.
      boxes_2_masks: a boolean Tensor with N or B * N boxes. Optional.
Zhenyu Tan's avatar
Zhenyu Tan committed
121
122

    Returns:
Zhenyu Tan's avatar
Zhenyu Tan committed
123
124
      A Tensor with shape [M, N] or [B, M, N] representing pairwise
        iou scores, anchor per row and groundtruth_box per colulmn.
Zhenyu Tan's avatar
Zhenyu Tan committed
125
126

    Input shape:
Zhenyu Tan's avatar
Zhenyu Tan committed
127
128
129
130
      boxes_1: [N, 4], or [B, N, 4]
      boxes_2: [M, 4], or [B, M, 4]
      boxes_1_masks: [N, 1], or [B, N, 1]
      boxes_2_masks: [M, 1], or [B, M, 1]
Zhenyu Tan's avatar
Zhenyu Tan committed
131
132

    Output shape:
Zhenyu Tan's avatar
Zhenyu Tan committed
133
      [M, N], or [B, M, N]
Zhenyu Tan's avatar
Zhenyu Tan committed
134
    """
135
136
137
    boxes_1 = tf.cast(boxes_1, tf.float32)
    boxes_2 = tf.cast(boxes_2, tf.float32)

Zhenyu Tan's avatar
Zhenyu Tan committed
138
139
140
141
142
143
144
145
146
    boxes_1_rank = len(boxes_1.shape)
    boxes_2_rank = len(boxes_2.shape)
    if boxes_1_rank < 2 or boxes_1_rank > 3:
      raise ValueError(
          '`groudtruth_boxes` must be rank 2 or 3, got {}'.format(boxes_1_rank))
    if boxes_2_rank < 2 or boxes_2_rank > 3:
      raise ValueError(
          '`anchors` must be rank 2 or 3, got {}'.format(boxes_2_rank))
    if boxes_1_rank < boxes_2_rank:
Zhenyu Tan's avatar
Zhenyu Tan committed
147
148
149
      raise ValueError('`groundtruth_boxes` is unbatched while `anchors` is '
                       'batched is not a valid use case, got groundtruth_box '
                       'rank {}, and anchors rank {}'.format(
Zhenyu Tan's avatar
Zhenyu Tan committed
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
                           boxes_1_rank, boxes_2_rank))

    result = iou(boxes_1, boxes_2)
    if boxes_1_masks is None and boxes_2_masks is None:
      return result
    background_mask = None
    mask_val_t = tf.cast(self.mask_val, result.dtype) * tf.ones_like(result)
    perm = [1, 0] if boxes_2_rank == 2 else [0, 2, 1]
    if boxes_1_masks is not None and boxes_2_masks is not None:
      background_mask = tf.logical_or(boxes_1_masks,
                                      tf.transpose(boxes_2_masks, perm))
    elif boxes_1_masks is not None:
      background_mask = boxes_1_masks
    else:
      background_mask = tf.logical_or(
          tf.zeros(tf.shape(boxes_2)[:-1], dtype=tf.bool),
          tf.transpose(boxes_2_masks, perm))
    return tf.where(background_mask, mask_val_t, result)