"cpp_onnx/vscode:/vscode.git/clone" did not exist on "dee81c34eb4b0de82a4ddaf3a6bf1430f40e2a33"
Unverified Commit 83a9a239 authored by Taylor Robie's avatar Taylor Robie Committed by GitHub
Browse files

remove fork of object_detection created for MLPerf. (#4840)

parent ad3526a9
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Base box coder.
Box coders convert between coordinate frames, namely image-centric
(with (0,0) on the top left of image) and anchor-centric (with (0,0) being
defined by a specific anchor).
Users of a BoxCoder can call two methods:
encode: which encodes a box with respect to a given anchor
(or rather, a tensor of boxes wrt a corresponding tensor of anchors) and
decode: which inverts this encoding with a decode operation.
In both cases, the arguments are assumed to be in 1-1 correspondence already;
it is not the job of a BoxCoder to perform matching.
"""
from abc import ABCMeta
from abc import abstractmethod
from abc import abstractproperty
import tensorflow as tf
# Box coder types.
FASTER_RCNN = 'faster_rcnn'
KEYPOINT = 'keypoint'
MEAN_STDDEV = 'mean_stddev'
SQUARE = 'square'
class BoxCoder(object):
"""Abstract base class for box coder."""
__metaclass__ = ABCMeta
@abstractproperty
def code_size(self):
"""Return the size of each code.
This number is a constant and should agree with the output of the `encode`
op (e.g. if rel_codes is the output of self.encode(...), then it should have
shape [N, code_size()]). This abstractproperty should be overridden by
implementations.
Returns:
an integer constant
"""
pass
def encode(self, boxes, anchors):
"""Encode a box list relative to an anchor collection.
Args:
boxes: BoxList holding N boxes to be encoded
anchors: BoxList of N anchors
Returns:
a tensor representing N relative-encoded boxes
"""
with tf.name_scope('Encode'):
return self._encode(boxes, anchors)
def decode(self, rel_codes, anchors):
"""Decode boxes that are encoded relative to an anchor collection.
Args:
rel_codes: a tensor representing N relative-encoded boxes
anchors: BoxList of anchors
Returns:
boxlist: BoxList holding N boxes encoded in the ordinary way (i.e.,
with corners y_min, x_min, y_max, x_max)
"""
with tf.name_scope('Decode'):
return self._decode(rel_codes, anchors)
@abstractmethod
def _encode(self, boxes, anchors):
"""Method to be overriden by implementations.
Args:
boxes: BoxList holding N boxes to be encoded
anchors: BoxList of N anchors
Returns:
a tensor representing N relative-encoded boxes
"""
pass
@abstractmethod
def _decode(self, rel_codes, anchors):
"""Method to be overriden by implementations.
Args:
rel_codes: a tensor representing N relative-encoded boxes
anchors: BoxList of anchors
Returns:
boxlist: BoxList holding N boxes encoded in the ordinary way (i.e.,
with corners y_min, x_min, y_max, x_max)
"""
pass
def batch_decode(encoded_boxes, box_coder, anchors):
"""Decode a batch of encoded boxes.
This op takes a batch of encoded bounding boxes and transforms
them to a batch of bounding boxes specified by their corners in
the order of [y_min, x_min, y_max, x_max].
Args:
encoded_boxes: a float32 tensor of shape [batch_size, num_anchors,
code_size] representing the location of the objects.
box_coder: a BoxCoder object.
anchors: a BoxList of anchors used to encode `encoded_boxes`.
Returns:
decoded_boxes: a float32 tensor of shape [batch_size, num_anchors,
coder_size] representing the corners of the objects in the order
of [y_min, x_min, y_max, x_max].
Raises:
ValueError: if batch sizes of the inputs are inconsistent, or if
the number of anchors inferred from encoded_boxes and anchors are
inconsistent.
"""
encoded_boxes.get_shape().assert_has_rank(3)
if encoded_boxes.get_shape()[1].value != anchors.num_boxes_static():
raise ValueError('The number of anchors inferred from encoded_boxes'
' and anchors are inconsistent: shape[1] of encoded_boxes'
' %s should be equal to the number of anchors: %s.' %
(encoded_boxes.get_shape()[1].value,
anchors.num_boxes_static()))
decoded_boxes = tf.stack([
box_coder.decode(boxes, anchors).get()
for boxes in tf.unstack(encoded_boxes)
])
return decoded_boxes
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.box_coder."""
import tensorflow as tf
from object_detection.core import box_coder
from object_detection.core import box_list
class MockBoxCoder(box_coder.BoxCoder):
"""Test BoxCoder that encodes/decodes using the multiply-by-two function."""
def code_size(self):
return 4
def _encode(self, boxes, anchors):
return 2.0 * boxes.get()
def _decode(self, rel_codes, anchors):
return box_list.BoxList(rel_codes / 2.0)
class BoxCoderTest(tf.test.TestCase):
def test_batch_decode(self):
mock_anchor_corners = tf.constant(
[[0, 0.1, 0.2, 0.3], [0.2, 0.4, 0.4, 0.6]], tf.float32)
mock_anchors = box_list.BoxList(mock_anchor_corners)
mock_box_coder = MockBoxCoder()
expected_boxes = [[[0.0, 0.1, 0.5, 0.6], [0.5, 0.6, 0.7, 0.8]],
[[0.1, 0.2, 0.3, 0.4], [0.7, 0.8, 0.9, 1.0]]]
encoded_boxes_list = [mock_box_coder.encode(
box_list.BoxList(tf.constant(boxes)), mock_anchors)
for boxes in expected_boxes]
encoded_boxes = tf.stack(encoded_boxes_list)
decoded_boxes = box_coder.batch_decode(
encoded_boxes, mock_box_coder, mock_anchors)
with self.test_session() as sess:
decoded_boxes_result = sess.run(decoded_boxes)
self.assertAllClose(expected_boxes, decoded_boxes_result)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Bounding Box List definition.
BoxList represents a list of bounding boxes as tensorflow
tensors, where each bounding box is represented as a row of 4 numbers,
[y_min, x_min, y_max, x_max]. It is assumed that all bounding boxes
within a given list correspond to a single image. See also
box_list_ops.py for common box related operations (such as area, iou, etc).
Optionally, users can add additional related fields (such as weights).
We assume the following things to be true about fields:
* they correspond to boxes in the box_list along the 0th dimension
* they have inferrable rank at graph construction time
* all dimensions except for possibly the 0th can be inferred
(i.e., not None) at graph construction time.
Some other notes:
* Following tensorflow conventions, we use height, width ordering,
and correspondingly, y,x (or ymin, xmin, ymax, xmax) ordering
* Tensors are always provided as (flat) [N, 4] tensors.
"""
import tensorflow as tf
class BoxList(object):
"""Box collection."""
def __init__(self, boxes):
"""Constructs box collection.
Args:
boxes: a tensor of shape [N, 4] representing box corners
Raises:
ValueError: if invalid dimensions for bbox data or if bbox data is not in
float32 format.
"""
if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4:
raise ValueError('Invalid dimensions for box data.')
if boxes.dtype != tf.float32:
raise ValueError('Invalid tensor type: should be tf.float32')
self.data = {'boxes': boxes}
def num_boxes(self):
"""Returns number of boxes held in collection.
Returns:
a tensor representing the number of boxes held in the collection.
"""
return tf.shape(self.data['boxes'])[0]
def num_boxes_static(self):
"""Returns number of boxes held in collection.
This number is inferred at graph construction time rather than run-time.
Returns:
Number of boxes held in collection (integer) or None if this is not
inferrable at graph construction time.
"""
return self.data['boxes'].get_shape()[0].value
def get_all_fields(self):
"""Returns all fields."""
return self.data.keys()
def get_extra_fields(self):
"""Returns all non-box fields (i.e., everything not named 'boxes')."""
return [k for k in self.data.keys() if k != 'boxes']
def add_field(self, field, field_data):
"""Add field to box list.
This method can be used to add related box data such as
weights/labels, etc.
Args:
field: a string key to access the data via `get`
field_data: a tensor containing the data to store in the BoxList
"""
self.data[field] = field_data
def has_field(self, field):
return field in self.data
def get(self):
"""Convenience function for accessing box coordinates.
Returns:
a tensor with shape [N, 4] representing box coordinates.
"""
return self.get_field('boxes')
def set(self, boxes):
"""Convenience function for setting box coordinates.
Args:
boxes: a tensor of shape [N, 4] representing box corners
Raises:
ValueError: if invalid dimensions for bbox data
"""
if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4:
raise ValueError('Invalid dimensions for box data.')
self.data['boxes'] = boxes
def get_field(self, field):
"""Accesses a box collection and associated fields.
This function returns specified field with object; if no field is specified,
it returns the box coordinates.
Args:
field: this optional string parameter can be used to specify
a related field to be accessed.
Returns:
a tensor representing the box collection or an associated field.
Raises:
ValueError: if invalid field
"""
if not self.has_field(field):
raise ValueError('field ' + str(field) + ' does not exist')
return self.data[field]
def set_field(self, field, value):
"""Sets the value of a field.
Updates the field of a box_list with a given value.
Args:
field: (string) name of the field to set value.
value: the value to assign to the field.
Raises:
ValueError: if the box_list does not have specified field.
"""
if not self.has_field(field):
raise ValueError('field %s does not exist' % field)
self.data[field] = value
def get_center_coordinates_and_sizes(self, scope=None):
"""Computes the center coordinates, height and width of the boxes.
Args:
scope: name scope of the function.
Returns:
a list of 4 1-D tensors [ycenter, xcenter, height, width].
"""
with tf.name_scope(scope, 'get_center_coordinates_and_sizes'):
box_corners = self.get()
ymin, xmin, ymax, xmax = tf.unstack(tf.transpose(box_corners))
width = xmax - xmin
height = ymax - ymin
ycenter = ymin + height / 2.
xcenter = xmin + width / 2.
return [ycenter, xcenter, height, width]
def transpose_coordinates(self, scope=None):
"""Transpose the coordinate representation in a boxlist.
Args:
scope: name scope of the function.
"""
with tf.name_scope(scope, 'transpose_coordinates'):
y_min, x_min, y_max, x_max = tf.split(
value=self.get(), num_or_size_splits=4, axis=1)
self.set(tf.concat([x_min, y_min, x_max, y_max], 1))
def as_tensor_dict(self, fields=None):
"""Retrieves specified fields as a dictionary of tensors.
Args:
fields: (optional) list of fields to return in the dictionary.
If None (default), all fields are returned.
Returns:
tensor_dict: A dictionary of tensors specified by fields.
Raises:
ValueError: if specified field is not contained in boxlist.
"""
tensor_dict = {}
if fields is None:
fields = self.get_all_fields()
for field in fields:
if not self.has_field(field):
raise ValueError('boxlist must contain all specified fields')
tensor_dict[field] = self.get_field(field)
return tensor_dict
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Bounding Box List operations.
Example box operations that are supported:
* areas: compute bounding box areas
* iou: pairwise intersection-over-union scores
* sq_dist: pairwise distances between bounding boxes
Whenever box_list_ops functions output a BoxList, the fields of the incoming
BoxList are retained unless documented otherwise.
"""
import tensorflow as tf
from object_detection.core import box_list
from object_detection.utils import shape_utils
class SortOrder(object):
"""Enum class for sort order.
Attributes:
ascend: ascend order.
descend: descend order.
"""
ascend = 1
descend = 2
def area(boxlist, scope=None):
"""Computes area of boxes.
Args:
boxlist: BoxList holding N boxes
scope: name scope.
Returns:
a tensor with shape [N] representing box areas.
"""
with tf.name_scope(scope, 'Area'):
y_min, x_min, y_max, x_max = tf.split(
value=boxlist.get(), num_or_size_splits=4, axis=1)
return tf.squeeze((y_max - y_min) * (x_max - x_min), [1])
def height_width(boxlist, scope=None):
"""Computes height and width of boxes in boxlist.
Args:
boxlist: BoxList holding N boxes
scope: name scope.
Returns:
Height: A tensor with shape [N] representing box heights.
Width: A tensor with shape [N] representing box widths.
"""
with tf.name_scope(scope, 'HeightWidth'):
y_min, x_min, y_max, x_max = tf.split(
value=boxlist.get(), num_or_size_splits=4, axis=1)
return tf.squeeze(y_max - y_min, [1]), tf.squeeze(x_max - x_min, [1])
def scale(boxlist, y_scale, x_scale, scope=None):
"""scale box coordinates in x and y dimensions.
Args:
boxlist: BoxList holding N boxes
y_scale: (float) scalar tensor
x_scale: (float) scalar tensor
scope: name scope.
Returns:
boxlist: BoxList holding N boxes
"""
with tf.name_scope(scope, 'Scale'):
y_scale = tf.cast(y_scale, tf.float32)
x_scale = tf.cast(x_scale, tf.float32)
y_min, x_min, y_max, x_max = tf.split(
value=boxlist.get(), num_or_size_splits=4, axis=1)
y_min = y_scale * y_min
y_max = y_scale * y_max
x_min = x_scale * x_min
x_max = x_scale * x_max
scaled_boxlist = box_list.BoxList(
tf.concat([y_min, x_min, y_max, x_max], 1))
return _copy_extra_fields(scaled_boxlist, boxlist)
def clip_to_window(boxlist, window, filter_nonoverlapping=True, scope=None):
"""Clip bounding boxes to a window.
This op clips any input bounding boxes (represented by bounding box
corners) to a window, optionally filtering out boxes that do not
overlap at all with the window.
Args:
boxlist: BoxList holding M_in boxes
window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
window to which the op should clip boxes.
filter_nonoverlapping: whether to filter out boxes that do not overlap at
all with the window.
scope: name scope.
Returns:
a BoxList holding M_out boxes where M_out <= M_in
"""
with tf.name_scope(scope, 'ClipToWindow'):
y_min, x_min, y_max, x_max = tf.split(
value=boxlist.get(), num_or_size_splits=4, axis=1)
win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
y_min_clipped = tf.maximum(tf.minimum(y_min, win_y_max), win_y_min)
y_max_clipped = tf.maximum(tf.minimum(y_max, win_y_max), win_y_min)
x_min_clipped = tf.maximum(tf.minimum(x_min, win_x_max), win_x_min)
x_max_clipped = tf.maximum(tf.minimum(x_max, win_x_max), win_x_min)
clipped = box_list.BoxList(
tf.concat([y_min_clipped, x_min_clipped, y_max_clipped, x_max_clipped],
1))
clipped = _copy_extra_fields(clipped, boxlist)
if filter_nonoverlapping:
areas = area(clipped)
nonzero_area_indices = tf.cast(
tf.reshape(tf.where(tf.greater(areas, 0.0)), [-1]), tf.int32)
clipped = gather(clipped, nonzero_area_indices)
return clipped
def prune_outside_window(boxlist, window, scope=None):
"""Prunes bounding boxes that fall outside a given window.
This function prunes bounding boxes that even partially fall outside the given
window. See also clip_to_window which only prunes bounding boxes that fall
completely outside the window, and clips any bounding boxes that partially
overflow.
Args:
boxlist: a BoxList holding M_in boxes.
window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
of the window
scope: name scope.
Returns:
pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in
valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
in the input tensor.
"""
with tf.name_scope(scope, 'PruneOutsideWindow'):
y_min, x_min, y_max, x_max = tf.split(
value=boxlist.get(), num_or_size_splits=4, axis=1)
win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
coordinate_violations = tf.concat([
tf.less(y_min, win_y_min), tf.less(x_min, win_x_min),
tf.greater(y_max, win_y_max), tf.greater(x_max, win_x_max)
], 1)
valid_indices = tf.reshape(
tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1])
return gather(boxlist, valid_indices), valid_indices
def prune_completely_outside_window(boxlist, window, scope=None):
"""Prunes bounding boxes that fall completely outside of the given window.
The function clip_to_window prunes bounding boxes that fall
completely outside the window, but also clips any bounding boxes that
partially overflow. This function does not clip partially overflowing boxes.
Args:
boxlist: a BoxList holding M_in boxes.
window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
of the window
scope: name scope.
Returns:
pruned_boxlist: a new BoxList with all bounding boxes partially or fully in
the window.
valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
in the input tensor.
"""
with tf.name_scope(scope, 'PruneCompleteleyOutsideWindow'):
y_min, x_min, y_max, x_max = tf.split(
value=boxlist.get(), num_or_size_splits=4, axis=1)
win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
coordinate_violations = tf.concat([
tf.greater_equal(y_min, win_y_max), tf.greater_equal(x_min, win_x_max),
tf.less_equal(y_max, win_y_min), tf.less_equal(x_max, win_x_min)
], 1)
valid_indices = tf.reshape(
tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1])
return gather(boxlist, valid_indices), valid_indices
def intersection(boxlist1, boxlist2, scope=None):
"""Compute pairwise intersection areas between boxes.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
scope: name scope.
Returns:
a tensor with shape [N, M] representing pairwise intersections
"""
with tf.name_scope(scope, 'Intersection'):
y_min1, x_min1, y_max1, x_max1 = tf.split(
value=boxlist1.get(), num_or_size_splits=4, axis=1)
y_min2, x_min2, y_max2, x_max2 = tf.split(
value=boxlist2.get(), num_or_size_splits=4, axis=1)
all_pairs_min_ymax = tf.minimum(y_max1, tf.transpose(y_max2))
all_pairs_max_ymin = tf.maximum(y_min1, tf.transpose(y_min2))
intersect_heights = tf.maximum(0.0, all_pairs_min_ymax - all_pairs_max_ymin)
all_pairs_min_xmax = tf.minimum(x_max1, tf.transpose(x_max2))
all_pairs_max_xmin = tf.maximum(x_min1, tf.transpose(x_min2))
intersect_widths = tf.maximum(0.0, all_pairs_min_xmax - all_pairs_max_xmin)
return intersect_heights * intersect_widths
def matched_intersection(boxlist1, boxlist2, scope=None):
"""Compute intersection areas between corresponding boxes in two boxlists.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding N boxes
scope: name scope.
Returns:
a tensor with shape [N] representing pairwise intersections
"""
with tf.name_scope(scope, 'MatchedIntersection'):
y_min1, x_min1, y_max1, x_max1 = tf.split(
value=boxlist1.get(), num_or_size_splits=4, axis=1)
y_min2, x_min2, y_max2, x_max2 = tf.split(
value=boxlist2.get(), num_or_size_splits=4, axis=1)
min_ymax = tf.minimum(y_max1, y_max2)
max_ymin = tf.maximum(y_min1, y_min2)
intersect_heights = tf.maximum(0.0, min_ymax - max_ymin)
min_xmax = tf.minimum(x_max1, x_max2)
max_xmin = tf.maximum(x_min1, x_min2)
intersect_widths = tf.maximum(0.0, min_xmax - max_xmin)
return tf.reshape(intersect_heights * intersect_widths, [-1])
def iou(boxlist1, boxlist2, scope=None):
"""Computes pairwise intersection-over-union between box collections.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
scope: name scope.
Returns:
a tensor with shape [N, M] representing pairwise iou scores.
"""
with tf.name_scope(scope, 'IOU'):
intersections = intersection(boxlist1, boxlist2)
areas1 = area(boxlist1)
areas2 = area(boxlist2)
unions = (
tf.expand_dims(areas1, 1) + tf.expand_dims(areas2, 0) - intersections)
return tf.where(
tf.equal(intersections, 0.0),
tf.zeros_like(intersections), tf.truediv(intersections, unions))
def matched_iou(boxlist1, boxlist2, scope=None):
"""Compute intersection-over-union between corresponding boxes in boxlists.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding N boxes
scope: name scope.
Returns:
a tensor with shape [N] representing pairwise iou scores.
"""
with tf.name_scope(scope, 'MatchedIOU'):
intersections = matched_intersection(boxlist1, boxlist2)
areas1 = area(boxlist1)
areas2 = area(boxlist2)
unions = areas1 + areas2 - intersections
return tf.where(
tf.equal(intersections, 0.0),
tf.zeros_like(intersections), tf.truediv(intersections, unions))
def ioa(boxlist1, boxlist2, scope=None):
"""Computes pairwise intersection-over-area between box collections.
intersection-over-area (IOA) between two boxes box1 and box2 is defined as
their intersection area over box2's area. Note that ioa is not symmetric,
that is, ioa(box1, box2) != ioa(box2, box1).
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
scope: name scope.
Returns:
a tensor with shape [N, M] representing pairwise ioa scores.
"""
with tf.name_scope(scope, 'IOA'):
intersections = intersection(boxlist1, boxlist2)
areas = tf.expand_dims(area(boxlist2), 0)
return tf.truediv(intersections, areas)
def prune_non_overlapping_boxes(
boxlist1, boxlist2, min_overlap=0.0, scope=None):
"""Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2.
For each box in boxlist1, we want its IOA to be more than minoverlap with
at least one of the boxes in boxlist2. If it does not, we remove it.
Args:
boxlist1: BoxList holding N boxes.
boxlist2: BoxList holding M boxes.
min_overlap: Minimum required overlap between boxes, to count them as
overlapping.
scope: name scope.
Returns:
new_boxlist1: A pruned boxlist with size [N', 4].
keep_inds: A tensor with shape [N'] indexing kept bounding boxes in the
first input BoxList `boxlist1`.
"""
with tf.name_scope(scope, 'PruneNonOverlappingBoxes'):
ioa_ = ioa(boxlist2, boxlist1) # [M, N] tensor
ioa_ = tf.reduce_max(ioa_, reduction_indices=[0]) # [N] tensor
keep_bool = tf.greater_equal(ioa_, tf.constant(min_overlap))
keep_inds = tf.squeeze(tf.where(keep_bool), squeeze_dims=[1])
new_boxlist1 = gather(boxlist1, keep_inds)
return new_boxlist1, keep_inds
def prune_small_boxes(boxlist, min_side, scope=None):
"""Prunes small boxes in the boxlist which have a side smaller than min_side.
Args:
boxlist: BoxList holding N boxes.
min_side: Minimum width AND height of box to survive pruning.
scope: name scope.
Returns:
A pruned boxlist.
"""
with tf.name_scope(scope, 'PruneSmallBoxes'):
height, width = height_width(boxlist)
is_valid = tf.logical_and(tf.greater_equal(width, min_side),
tf.greater_equal(height, min_side))
return gather(boxlist, tf.reshape(tf.where(is_valid), [-1]))
def change_coordinate_frame(boxlist, window, scope=None):
"""Change coordinate frame of the boxlist to be relative to window's frame.
Given a window of the form [ymin, xmin, ymax, xmax],
changes bounding box coordinates from boxlist to be relative to this window
(e.g., the min corner maps to (0,0) and the max corner maps to (1,1)).
An example use case is data augmentation: where we are given groundtruth
boxes (boxlist) and would like to randomly crop the image to some
window (window). In this case we need to change the coordinate frame of
each groundtruth box to be relative to this new window.
Args:
boxlist: A BoxList object holding N boxes.
window: A rank 1 tensor [4].
scope: name scope.
Returns:
Returns a BoxList object with N boxes.
"""
with tf.name_scope(scope, 'ChangeCoordinateFrame'):
win_height = window[2] - window[0]
win_width = window[3] - window[1]
boxlist_new = scale(box_list.BoxList(
boxlist.get() - [window[0], window[1], window[0], window[1]]),
1.0 / win_height, 1.0 / win_width)
boxlist_new = _copy_extra_fields(boxlist_new, boxlist)
return boxlist_new
def sq_dist(boxlist1, boxlist2, scope=None):
"""Computes the pairwise squared distances between box corners.
This op treats each box as if it were a point in a 4d Euclidean space and
computes pairwise squared distances.
Mathematically, we are given two matrices of box coordinates X and Y,
where X(i,:) is the i'th row of X, containing the 4 numbers defining the
corners of the i'th box in boxlist1. Similarly Y(j,:) corresponds to
boxlist2. We compute
Z(i,j) = ||X(i,:) - Y(j,:)||^2
= ||X(i,:)||^2 + ||Y(j,:)||^2 - 2 X(i,:)' * Y(j,:),
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
scope: name scope.
Returns:
a tensor with shape [N, M] representing pairwise distances
"""
with tf.name_scope(scope, 'SqDist'):
sqnorm1 = tf.reduce_sum(tf.square(boxlist1.get()), 1, keep_dims=True)
sqnorm2 = tf.reduce_sum(tf.square(boxlist2.get()), 1, keep_dims=True)
innerprod = tf.matmul(boxlist1.get(), boxlist2.get(),
transpose_a=False, transpose_b=True)
return sqnorm1 + tf.transpose(sqnorm2) - 2.0 * innerprod
def boolean_mask(boxlist, indicator, fields=None, scope=None):
"""Select boxes from BoxList according to indicator and return new BoxList.
`boolean_mask` returns the subset of boxes that are marked as "True" by the
indicator tensor. By default, `boolean_mask` returns boxes corresponding to
the input index list, as well as all additional fields stored in the boxlist
(indexing into the first dimension). However one can optionally only draw
from a subset of fields.
Args:
boxlist: BoxList holding N boxes
indicator: a rank-1 boolean tensor
fields: (optional) list of fields to also gather from. If None (default),
all fields are gathered from. Pass an empty fields list to only gather
the box coordinates.
scope: name scope.
Returns:
subboxlist: a BoxList corresponding to the subset of the input BoxList
specified by indicator
Raises:
ValueError: if `indicator` is not a rank-1 boolean tensor.
"""
with tf.name_scope(scope, 'BooleanMask'):
if indicator.shape.ndims != 1:
raise ValueError('indicator should have rank 1')
if indicator.dtype != tf.bool:
raise ValueError('indicator should be a boolean tensor')
subboxlist = box_list.BoxList(tf.boolean_mask(boxlist.get(), indicator))
if fields is None:
fields = boxlist.get_extra_fields()
for field in fields:
if not boxlist.has_field(field):
raise ValueError('boxlist must contain all specified fields')
subfieldlist = tf.boolean_mask(boxlist.get_field(field), indicator)
subboxlist.add_field(field, subfieldlist)
return subboxlist
def gather(boxlist, indices, fields=None, scope=None):
"""Gather boxes from BoxList according to indices and return new BoxList.
By default, `gather` returns boxes corresponding to the input index list, as
well as all additional fields stored in the boxlist (indexing into the
first dimension). However one can optionally only gather from a
subset of fields.
Args:
boxlist: BoxList holding N boxes
indices: a rank-1 tensor of type int32 / int64
fields: (optional) list of fields to also gather from. If None (default),
all fields are gathered from. Pass an empty fields list to only gather
the box coordinates.
scope: name scope.
Returns:
subboxlist: a BoxList corresponding to the subset of the input BoxList
specified by indices
Raises:
ValueError: if specified field is not contained in boxlist or if the
indices are not of type int32
"""
with tf.name_scope(scope, 'Gather'):
if len(indices.shape.as_list()) != 1:
raise ValueError('indices should have rank 1')
if indices.dtype != tf.int32 and indices.dtype != tf.int64:
raise ValueError('indices should be an int32 / int64 tensor')
subboxlist = box_list.BoxList(tf.gather(boxlist.get(), indices))
if fields is None:
fields = boxlist.get_extra_fields()
for field in fields:
if not boxlist.has_field(field):
raise ValueError('boxlist must contain all specified fields')
subfieldlist = tf.gather(boxlist.get_field(field), indices)
subboxlist.add_field(field, subfieldlist)
return subboxlist
def concatenate(boxlists, fields=None, scope=None):
"""Concatenate list of BoxLists.
This op concatenates a list of input BoxLists into a larger BoxList. It also
handles concatenation of BoxList fields as long as the field tensor shapes
are equal except for the first dimension.
Args:
boxlists: list of BoxList objects
fields: optional list of fields to also concatenate. By default, all
fields from the first BoxList in the list are included in the
concatenation.
scope: name scope.
Returns:
a BoxList with number of boxes equal to
sum([boxlist.num_boxes() for boxlist in BoxList])
Raises:
ValueError: if boxlists is invalid (i.e., is not a list, is empty, or
contains non BoxList objects), or if requested fields are not contained in
all boxlists
"""
with tf.name_scope(scope, 'Concatenate'):
if not isinstance(boxlists, list):
raise ValueError('boxlists should be a list')
if not boxlists:
raise ValueError('boxlists should have nonzero length')
for boxlist in boxlists:
if not isinstance(boxlist, box_list.BoxList):
raise ValueError('all elements of boxlists should be BoxList objects')
concatenated = box_list.BoxList(
tf.concat([boxlist.get() for boxlist in boxlists], 0))
if fields is None:
fields = boxlists[0].get_extra_fields()
for field in fields:
first_field_shape = boxlists[0].get_field(field).get_shape().as_list()
first_field_shape[0] = -1
if None in first_field_shape:
raise ValueError('field %s must have fully defined shape except for the'
' 0th dimension.' % field)
for boxlist in boxlists:
if not boxlist.has_field(field):
raise ValueError('boxlist must contain all requested fields')
field_shape = boxlist.get_field(field).get_shape().as_list()
field_shape[0] = -1
if field_shape != first_field_shape:
raise ValueError('field %s must have same shape for all boxlists '
'except for the 0th dimension.' % field)
concatenated_field = tf.concat(
[boxlist.get_field(field) for boxlist in boxlists], 0)
concatenated.add_field(field, concatenated_field)
return concatenated
def sort_by_field(boxlist, field, order=SortOrder.descend, scope=None):
"""Sort boxes and associated fields according to a scalar field.
A common use case is reordering the boxes according to descending scores.
Args:
boxlist: BoxList holding N boxes.
field: A BoxList field for sorting and reordering the BoxList.
order: (Optional) descend or ascend. Default is descend.
scope: name scope.
Returns:
sorted_boxlist: A sorted BoxList with the field in the specified order.
Raises:
ValueError: if specified field does not exist
ValueError: if the order is not either descend or ascend
"""
with tf.name_scope(scope, 'SortByField'):
if order != SortOrder.descend and order != SortOrder.ascend:
raise ValueError('Invalid sort order')
field_to_sort = boxlist.get_field(field)
if len(field_to_sort.shape.as_list()) != 1:
raise ValueError('Field should have rank 1')
num_boxes = boxlist.num_boxes()
num_entries = tf.size(field_to_sort)
length_assert = tf.Assert(
tf.equal(num_boxes, num_entries),
['Incorrect field size: actual vs expected.', num_entries, num_boxes])
with tf.control_dependencies([length_assert]):
# TODO(derekjchow): Remove with tf.device when top_k operation runs
# correctly on GPU.
with tf.device('/cpu:0'):
_, sorted_indices = tf.nn.top_k(field_to_sort, num_boxes, sorted=True)
if order == SortOrder.ascend:
sorted_indices = tf.reverse_v2(sorted_indices, [0])
return gather(boxlist, sorted_indices)
def visualize_boxes_in_image(image, boxlist, normalized=False, scope=None):
"""Overlay bounding box list on image.
Currently this visualization plots a 1 pixel thick red bounding box on top
of the image. Note that tf.image.draw_bounding_boxes essentially is
1 indexed.
Args:
image: an image tensor with shape [height, width, 3]
boxlist: a BoxList
normalized: (boolean) specify whether corners are to be interpreted
as absolute coordinates in image space or normalized with respect to the
image size.
scope: name scope.
Returns:
image_and_boxes: an image tensor with shape [height, width, 3]
"""
with tf.name_scope(scope, 'VisualizeBoxesInImage'):
if not normalized:
height, width, _ = tf.unstack(tf.shape(image))
boxlist = scale(boxlist,
1.0 / tf.cast(height, tf.float32),
1.0 / tf.cast(width, tf.float32))
corners = tf.expand_dims(boxlist.get(), 0)
image = tf.expand_dims(image, 0)
return tf.squeeze(tf.image.draw_bounding_boxes(image, corners), [0])
def filter_field_value_equals(boxlist, field, value, scope=None):
"""Filter to keep only boxes with field entries equal to the given value.
Args:
boxlist: BoxList holding N boxes.
field: field name for filtering.
value: scalar value.
scope: name scope.
Returns:
a BoxList holding M boxes where M <= N
Raises:
ValueError: if boxlist not a BoxList object or if it does not have
the specified field.
"""
with tf.name_scope(scope, 'FilterFieldValueEquals'):
if not isinstance(boxlist, box_list.BoxList):
raise ValueError('boxlist must be a BoxList')
if not boxlist.has_field(field):
raise ValueError('boxlist must contain the specified field')
filter_field = boxlist.get_field(field)
gather_index = tf.reshape(tf.where(tf.equal(filter_field, value)), [-1])
return gather(boxlist, gather_index)
def filter_greater_than(boxlist, thresh, scope=None):
"""Filter to keep only boxes with score exceeding a given threshold.
This op keeps the collection of boxes whose corresponding scores are
greater than the input threshold.
TODO(jonathanhuang): Change function name to filter_scores_greater_than
Args:
boxlist: BoxList holding N boxes. Must contain a 'scores' field
representing detection scores.
thresh: scalar threshold
scope: name scope.
Returns:
a BoxList holding M boxes where M <= N
Raises:
ValueError: if boxlist not a BoxList object or if it does not
have a scores field
"""
with tf.name_scope(scope, 'FilterGreaterThan'):
if not isinstance(boxlist, box_list.BoxList):
raise ValueError('boxlist must be a BoxList')
if not boxlist.has_field('scores'):
raise ValueError('input boxlist must have \'scores\' field')
scores = boxlist.get_field('scores')
if len(scores.shape.as_list()) > 2:
raise ValueError('Scores should have rank 1 or 2')
if len(scores.shape.as_list()) == 2 and scores.shape.as_list()[1] != 1:
raise ValueError('Scores should have rank 1 or have shape '
'consistent with [None, 1]')
high_score_indices = tf.cast(tf.reshape(
tf.where(tf.greater(scores, thresh)),
[-1]), tf.int32)
return gather(boxlist, high_score_indices)
def non_max_suppression(boxlist, thresh, max_output_size, scope=None):
"""Non maximum suppression.
This op greedily selects a subset of detection bounding boxes, pruning
away boxes that have high IOU (intersection over union) overlap (> thresh)
with already selected boxes. Note that this only works for a single class ---
to apply NMS to multi-class predictions, use MultiClassNonMaxSuppression.
Args:
boxlist: BoxList holding N boxes. Must contain a 'scores' field
representing detection scores.
thresh: scalar threshold
max_output_size: maximum number of retained boxes
scope: name scope.
Returns:
a BoxList holding M boxes where M <= max_output_size
Raises:
ValueError: if thresh is not in [0, 1]
"""
with tf.name_scope(scope, 'NonMaxSuppression'):
if not 0 <= thresh <= 1.0:
raise ValueError('thresh must be between 0 and 1')
if not isinstance(boxlist, box_list.BoxList):
raise ValueError('boxlist must be a BoxList')
if not boxlist.has_field('scores'):
raise ValueError('input boxlist must have \'scores\' field')
selected_indices = tf.image.non_max_suppression(
boxlist.get(), boxlist.get_field('scores'),
max_output_size, iou_threshold=thresh)
return gather(boxlist, selected_indices)
def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from):
"""Copies the extra fields of boxlist_to_copy_from to boxlist_to_copy_to.
Args:
boxlist_to_copy_to: BoxList to which extra fields are copied.
boxlist_to_copy_from: BoxList from which fields are copied.
Returns:
boxlist_to_copy_to with extra fields.
"""
for field in boxlist_to_copy_from.get_extra_fields():
boxlist_to_copy_to.add_field(field, boxlist_to_copy_from.get_field(field))
return boxlist_to_copy_to
def to_normalized_coordinates(boxlist, height, width,
check_range=True, scope=None):
"""Converts absolute box coordinates to normalized coordinates in [0, 1].
Usually one uses the dynamic shape of the image or conv-layer tensor:
boxlist = box_list_ops.to_normalized_coordinates(boxlist,
tf.shape(images)[1],
tf.shape(images)[2]),
This function raises an assertion failed error at graph execution time when
the maximum coordinate is smaller than 1.01 (which means that coordinates are
already normalized). The value 1.01 is to deal with small rounding errors.
Args:
boxlist: BoxList with coordinates in terms of pixel-locations.
height: Maximum value for height of absolute box coordinates.
width: Maximum value for width of absolute box coordinates.
check_range: If True, checks if the coordinates are normalized or not.
scope: name scope.
Returns:
boxlist with normalized coordinates in [0, 1].
"""
with tf.name_scope(scope, 'ToNormalizedCoordinates'):
height = tf.cast(height, tf.float32)
width = tf.cast(width, tf.float32)
if check_range:
max_val = tf.reduce_max(boxlist.get())
max_assert = tf.Assert(tf.greater(max_val, 1.01),
['max value is lower than 1.01: ', max_val])
with tf.control_dependencies([max_assert]):
width = tf.identity(width)
return scale(boxlist, 1 / height, 1 / width)
def to_absolute_coordinates(boxlist,
height,
width,
check_range=True,
maximum_normalized_coordinate=1.1,
scope=None):
"""Converts normalized box coordinates to absolute pixel coordinates.
This function raises an assertion failed error when the maximum box coordinate
value is larger than maximum_normalized_coordinate (in which case coordinates
are already absolute).
Args:
boxlist: BoxList with coordinates in range [0, 1].
height: Maximum value for height of absolute box coordinates.
width: Maximum value for width of absolute box coordinates.
check_range: If True, checks if the coordinates are normalized or not.
maximum_normalized_coordinate: Maximum coordinate value to be considered
as normalized, default to 1.1.
scope: name scope.
Returns:
boxlist with absolute coordinates in terms of the image size.
"""
with tf.name_scope(scope, 'ToAbsoluteCoordinates'):
height = tf.cast(height, tf.float32)
width = tf.cast(width, tf.float32)
# Ensure range of input boxes is correct.
if check_range:
box_maximum = tf.reduce_max(boxlist.get())
max_assert = tf.Assert(
tf.greater_equal(maximum_normalized_coordinate, box_maximum),
['maximum box coordinate value is larger '
'than %f: ' % maximum_normalized_coordinate, box_maximum])
with tf.control_dependencies([max_assert]):
width = tf.identity(width)
return scale(boxlist, height, width)
def refine_boxes_multi_class(pool_boxes,
num_classes,
nms_iou_thresh,
nms_max_detections,
voting_iou_thresh=0.5):
"""Refines a pool of boxes using non max suppression and box voting.
Box refinement is done independently for each class.
Args:
pool_boxes: (BoxList) A collection of boxes to be refined. pool_boxes must
have a rank 1 'scores' field and a rank 1 'classes' field.
num_classes: (int scalar) Number of classes.
nms_iou_thresh: (float scalar) iou threshold for non max suppression (NMS).
nms_max_detections: (int scalar) maximum output size for NMS.
voting_iou_thresh: (float scalar) iou threshold for box voting.
Returns:
BoxList of refined boxes.
Raises:
ValueError: if
a) nms_iou_thresh or voting_iou_thresh is not in [0, 1].
b) pool_boxes is not a BoxList.
c) pool_boxes does not have a scores and classes field.
"""
if not 0.0 <= nms_iou_thresh <= 1.0:
raise ValueError('nms_iou_thresh must be between 0 and 1')
if not 0.0 <= voting_iou_thresh <= 1.0:
raise ValueError('voting_iou_thresh must be between 0 and 1')
if not isinstance(pool_boxes, box_list.BoxList):
raise ValueError('pool_boxes must be a BoxList')
if not pool_boxes.has_field('scores'):
raise ValueError('pool_boxes must have a \'scores\' field')
if not pool_boxes.has_field('classes'):
raise ValueError('pool_boxes must have a \'classes\' field')
refined_boxes = []
for i in range(num_classes):
boxes_class = filter_field_value_equals(pool_boxes, 'classes', i)
refined_boxes_class = refine_boxes(boxes_class, nms_iou_thresh,
nms_max_detections, voting_iou_thresh)
refined_boxes.append(refined_boxes_class)
return sort_by_field(concatenate(refined_boxes), 'scores')
def refine_boxes(pool_boxes,
nms_iou_thresh,
nms_max_detections,
voting_iou_thresh=0.5):
"""Refines a pool of boxes using non max suppression and box voting.
Args:
pool_boxes: (BoxList) A collection of boxes to be refined. pool_boxes must
have a rank 1 'scores' field.
nms_iou_thresh: (float scalar) iou threshold for non max suppression (NMS).
nms_max_detections: (int scalar) maximum output size for NMS.
voting_iou_thresh: (float scalar) iou threshold for box voting.
Returns:
BoxList of refined boxes.
Raises:
ValueError: if
a) nms_iou_thresh or voting_iou_thresh is not in [0, 1].
b) pool_boxes is not a BoxList.
c) pool_boxes does not have a scores field.
"""
if not 0.0 <= nms_iou_thresh <= 1.0:
raise ValueError('nms_iou_thresh must be between 0 and 1')
if not 0.0 <= voting_iou_thresh <= 1.0:
raise ValueError('voting_iou_thresh must be between 0 and 1')
if not isinstance(pool_boxes, box_list.BoxList):
raise ValueError('pool_boxes must be a BoxList')
if not pool_boxes.has_field('scores'):
raise ValueError('pool_boxes must have a \'scores\' field')
nms_boxes = non_max_suppression(
pool_boxes, nms_iou_thresh, nms_max_detections)
return box_voting(nms_boxes, pool_boxes, voting_iou_thresh)
def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
"""Performs box voting as described in S. Gidaris and N. Komodakis, ICCV 2015.
Performs box voting as described in 'Object detection via a multi-region &
semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For
each box 'B' in selected_boxes, we find the set 'S' of boxes in pool_boxes
with iou overlap >= iou_thresh. The location of B is set to the weighted
average location of boxes in S (scores are used for weighting). And the score
of B is set to the average score of boxes in S.
Args:
selected_boxes: BoxList containing a subset of boxes in pool_boxes. These
boxes are usually selected from pool_boxes using non max suppression.
pool_boxes: BoxList containing a set of (possibly redundant) boxes.
iou_thresh: (float scalar) iou threshold for matching boxes in
selected_boxes and pool_boxes.
Returns:
BoxList containing averaged locations and scores for each box in
selected_boxes.
Raises:
ValueError: if
a) selected_boxes or pool_boxes is not a BoxList.
b) if iou_thresh is not in [0, 1].
c) pool_boxes does not have a scores field.
"""
if not 0.0 <= iou_thresh <= 1.0:
raise ValueError('iou_thresh must be between 0 and 1')
if not isinstance(selected_boxes, box_list.BoxList):
raise ValueError('selected_boxes must be a BoxList')
if not isinstance(pool_boxes, box_list.BoxList):
raise ValueError('pool_boxes must be a BoxList')
if not pool_boxes.has_field('scores'):
raise ValueError('pool_boxes must have a \'scores\' field')
iou_ = iou(selected_boxes, pool_boxes)
match_indicator = tf.to_float(tf.greater(iou_, iou_thresh))
num_matches = tf.reduce_sum(match_indicator, 1)
# TODO(kbanoop): Handle the case where some boxes in selected_boxes do not
# match to any boxes in pool_boxes. For such boxes without any matches, we
# should return the original boxes without voting.
match_assert = tf.Assert(
tf.reduce_all(tf.greater(num_matches, 0)),
['Each box in selected_boxes must match with at least one box '
'in pool_boxes.'])
scores = tf.expand_dims(pool_boxes.get_field('scores'), 1)
scores_assert = tf.Assert(
tf.reduce_all(tf.greater_equal(scores, 0)),
['Scores must be non negative.'])
with tf.control_dependencies([scores_assert, match_assert]):
sum_scores = tf.matmul(match_indicator, scores)
averaged_scores = tf.reshape(sum_scores, [-1]) / num_matches
box_locations = tf.matmul(match_indicator,
pool_boxes.get() * scores) / sum_scores
averaged_boxes = box_list.BoxList(box_locations)
_copy_extra_fields(averaged_boxes, selected_boxes)
averaged_boxes.add_field('scores', averaged_scores)
return averaged_boxes
def pad_or_clip_box_list(boxlist, num_boxes, scope=None):
"""Pads or clips all fields of a BoxList.
Args:
boxlist: A BoxList with arbitrary of number of boxes.
num_boxes: First num_boxes in boxlist are kept.
The fields are zero-padded if num_boxes is bigger than the
actual number of boxes.
scope: name scope.
Returns:
BoxList with all fields padded or clipped.
"""
with tf.name_scope(scope, 'PadOrClipBoxList'):
subboxlist = box_list.BoxList(shape_utils.pad_or_clip_tensor(
boxlist.get(), num_boxes))
for field in boxlist.get_extra_fields():
subfield = shape_utils.pad_or_clip_tensor(
boxlist.get_field(field), num_boxes)
subboxlist.add_field(field, subfield)
return subboxlist
def select_random_box(boxlist,
default_box=None,
seed=None,
scope=None):
"""Selects a random bounding box from a `BoxList`.
Args:
boxlist: A BoxList.
default_box: A [1, 4] float32 tensor. If no boxes are present in `boxlist`,
this default box will be returned. If None, will use a default box of
[[-1., -1., -1., -1.]].
seed: Random seed.
scope: Name scope.
Returns:
bbox: A [1, 4] tensor with a random bounding box.
valid: A bool tensor indicating whether a valid bounding box is returned
(True) or whether the default box is returned (False).
"""
with tf.name_scope(scope, 'SelectRandomBox'):
bboxes = boxlist.get()
combined_shape = shape_utils.combined_static_and_dynamic_shape(bboxes)
number_of_boxes = combined_shape[0]
default_box = default_box or tf.constant([[-1., -1., -1., -1.]])
def select_box():
random_index = tf.random_uniform([],
maxval=number_of_boxes,
dtype=tf.int32,
seed=seed)
return tf.expand_dims(bboxes[random_index], axis=0), tf.constant(True)
return tf.cond(
tf.greater_equal(number_of_boxes, 1),
true_fn=select_box,
false_fn=lambda: (default_box, tf.constant(False)))
def get_minimal_coverage_box(boxlist,
default_box=None,
scope=None):
"""Creates a single bounding box which covers all boxes in the boxlist.
Args:
boxlist: A Boxlist.
default_box: A [1, 4] float32 tensor. If no boxes are present in `boxlist`,
this default box will be returned. If None, will use a default box of
[[0., 0., 1., 1.]].
scope: Name scope.
Returns:
A [1, 4] float32 tensor with a bounding box that tightly covers all the
boxes in the box list. If the boxlist does not contain any boxes, the
default box is returned.
"""
with tf.name_scope(scope, 'CreateCoverageBox'):
num_boxes = boxlist.num_boxes()
def coverage_box(bboxes):
y_min, x_min, y_max, x_max = tf.split(
value=bboxes, num_or_size_splits=4, axis=1)
y_min_coverage = tf.reduce_min(y_min, axis=0)
x_min_coverage = tf.reduce_min(x_min, axis=0)
y_max_coverage = tf.reduce_max(y_max, axis=0)
x_max_coverage = tf.reduce_max(x_max, axis=0)
return tf.stack(
[y_min_coverage, x_min_coverage, y_max_coverage, x_max_coverage],
axis=1)
default_box = default_box or tf.constant([[0., 0., 1., 1.]])
return tf.cond(
tf.greater_equal(num_boxes, 1),
true_fn=lambda: coverage_box(boxlist.get()),
false_fn=lambda: default_box)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.box_list_ops."""
import numpy as np
import tensorflow as tf
from tensorflow.python.framework import errors
from tensorflow.python.framework import ops
from object_detection.core import box_list
from object_detection.core import box_list_ops
class BoxListOpsTest(tf.test.TestCase):
"""Tests for common bounding box operations."""
def test_area(self):
corners = tf.constant([[0.0, 0.0, 10.0, 20.0], [1.0, 2.0, 3.0, 4.0]])
exp_output = [200.0, 4.0]
boxes = box_list.BoxList(corners)
areas = box_list_ops.area(boxes)
with self.test_session() as sess:
areas_output = sess.run(areas)
self.assertAllClose(areas_output, exp_output)
def test_height_width(self):
corners = tf.constant([[0.0, 0.0, 10.0, 20.0], [1.0, 2.0, 3.0, 4.0]])
exp_output_heights = [10., 2.]
exp_output_widths = [20., 2.]
boxes = box_list.BoxList(corners)
heights, widths = box_list_ops.height_width(boxes)
with self.test_session() as sess:
output_heights, output_widths = sess.run([heights, widths])
self.assertAllClose(output_heights, exp_output_heights)
self.assertAllClose(output_widths, exp_output_widths)
def test_scale(self):
corners = tf.constant([[0, 0, 100, 200], [50, 120, 100, 140]],
dtype=tf.float32)
boxes = box_list.BoxList(corners)
boxes.add_field('extra_data', tf.constant([[1], [2]]))
y_scale = tf.constant(1.0/100)
x_scale = tf.constant(1.0/200)
scaled_boxes = box_list_ops.scale(boxes, y_scale, x_scale)
exp_output = [[0, 0, 1, 1], [0.5, 0.6, 1.0, 0.7]]
with self.test_session() as sess:
scaled_corners_out = sess.run(scaled_boxes.get())
self.assertAllClose(scaled_corners_out, exp_output)
extra_data_out = sess.run(scaled_boxes.get_field('extra_data'))
self.assertAllEqual(extra_data_out, [[1], [2]])
def test_clip_to_window_filter_boxes_which_fall_outside_the_window(
self):
window = tf.constant([0, 0, 9, 14], tf.float32)
corners = tf.constant([[5.0, 5.0, 6.0, 6.0],
[-1.0, -2.0, 4.0, 5.0],
[2.0, 3.0, 5.0, 9.0],
[0.0, 0.0, 9.0, 14.0],
[-100.0, -100.0, 300.0, 600.0],
[-10.0, -10.0, -9.0, -9.0]])
boxes = box_list.BoxList(corners)
boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]]))
exp_output = [[5.0, 5.0, 6.0, 6.0], [0.0, 0.0, 4.0, 5.0],
[2.0, 3.0, 5.0, 9.0], [0.0, 0.0, 9.0, 14.0],
[0.0, 0.0, 9.0, 14.0]]
pruned = box_list_ops.clip_to_window(
boxes, window, filter_nonoverlapping=True)
with self.test_session() as sess:
pruned_output = sess.run(pruned.get())
self.assertAllClose(pruned_output, exp_output)
extra_data_out = sess.run(pruned.get_field('extra_data'))
self.assertAllEqual(extra_data_out, [[1], [2], [3], [4], [5]])
def test_clip_to_window_without_filtering_boxes_which_fall_outside_the_window(
self):
window = tf.constant([0, 0, 9, 14], tf.float32)
corners = tf.constant([[5.0, 5.0, 6.0, 6.0],
[-1.0, -2.0, 4.0, 5.0],
[2.0, 3.0, 5.0, 9.0],
[0.0, 0.0, 9.0, 14.0],
[-100.0, -100.0, 300.0, 600.0],
[-10.0, -10.0, -9.0, -9.0]])
boxes = box_list.BoxList(corners)
boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]]))
exp_output = [[5.0, 5.0, 6.0, 6.0], [0.0, 0.0, 4.0, 5.0],
[2.0, 3.0, 5.0, 9.0], [0.0, 0.0, 9.0, 14.0],
[0.0, 0.0, 9.0, 14.0], [0.0, 0.0, 0.0, 0.0]]
pruned = box_list_ops.clip_to_window(
boxes, window, filter_nonoverlapping=False)
with self.test_session() as sess:
pruned_output = sess.run(pruned.get())
self.assertAllClose(pruned_output, exp_output)
extra_data_out = sess.run(pruned.get_field('extra_data'))
self.assertAllEqual(extra_data_out, [[1], [2], [3], [4], [5], [6]])
def test_prune_outside_window_filters_boxes_which_fall_outside_the_window(
self):
window = tf.constant([0, 0, 9, 14], tf.float32)
corners = tf.constant([[5.0, 5.0, 6.0, 6.0],
[-1.0, -2.0, 4.0, 5.0],
[2.0, 3.0, 5.0, 9.0],
[0.0, 0.0, 9.0, 14.0],
[-10.0, -10.0, -9.0, -9.0],
[-100.0, -100.0, 300.0, 600.0]])
boxes = box_list.BoxList(corners)
boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]]))
exp_output = [[5.0, 5.0, 6.0, 6.0],
[2.0, 3.0, 5.0, 9.0],
[0.0, 0.0, 9.0, 14.0]]
pruned, keep_indices = box_list_ops.prune_outside_window(boxes, window)
with self.test_session() as sess:
pruned_output = sess.run(pruned.get())
self.assertAllClose(pruned_output, exp_output)
keep_indices_out = sess.run(keep_indices)
self.assertAllEqual(keep_indices_out, [0, 2, 3])
extra_data_out = sess.run(pruned.get_field('extra_data'))
self.assertAllEqual(extra_data_out, [[1], [3], [4]])
def test_prune_completely_outside_window(self):
window = tf.constant([0, 0, 9, 14], tf.float32)
corners = tf.constant([[5.0, 5.0, 6.0, 6.0],
[-1.0, -2.0, 4.0, 5.0],
[2.0, 3.0, 5.0, 9.0],
[0.0, 0.0, 9.0, 14.0],
[-10.0, -10.0, -9.0, -9.0],
[-100.0, -100.0, 300.0, 600.0]])
boxes = box_list.BoxList(corners)
boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]]))
exp_output = [[5.0, 5.0, 6.0, 6.0],
[-1.0, -2.0, 4.0, 5.0],
[2.0, 3.0, 5.0, 9.0],
[0.0, 0.0, 9.0, 14.0],
[-100.0, -100.0, 300.0, 600.0]]
pruned, keep_indices = box_list_ops.prune_completely_outside_window(boxes,
window)
with self.test_session() as sess:
pruned_output = sess.run(pruned.get())
self.assertAllClose(pruned_output, exp_output)
keep_indices_out = sess.run(keep_indices)
self.assertAllEqual(keep_indices_out, [0, 1, 2, 3, 5])
extra_data_out = sess.run(pruned.get_field('extra_data'))
self.assertAllEqual(extra_data_out, [[1], [2], [3], [4], [6]])
def test_prune_completely_outside_window_with_empty_boxlist(self):
window = tf.constant([0, 0, 9, 14], tf.float32)
corners = tf.zeros(shape=[0, 4], dtype=tf.float32)
boxes = box_list.BoxList(corners)
boxes.add_field('extra_data', tf.zeros(shape=[0], dtype=tf.int32))
pruned, keep_indices = box_list_ops.prune_completely_outside_window(boxes,
window)
pruned_boxes = pruned.get()
extra = pruned.get_field('extra_data')
exp_pruned_boxes = np.zeros(shape=[0, 4], dtype=np.float32)
exp_extra = np.zeros(shape=[0], dtype=np.int32)
with self.test_session() as sess:
pruned_boxes_out, keep_indices_out, extra_out = sess.run(
[pruned_boxes, keep_indices, extra])
self.assertAllClose(exp_pruned_boxes, pruned_boxes_out)
self.assertAllEqual([], keep_indices_out)
self.assertAllEqual(exp_extra, extra_out)
def test_intersection(self):
corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]])
exp_output = [[2.0, 0.0, 6.0], [1.0, 0.0, 5.0]]
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
intersect = box_list_ops.intersection(boxes1, boxes2)
with self.test_session() as sess:
intersect_output = sess.run(intersect)
self.assertAllClose(intersect_output, exp_output)
def test_matched_intersection(self):
corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]])
exp_output = [2.0, 0.0]
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
intersect = box_list_ops.matched_intersection(boxes1, boxes2)
with self.test_session() as sess:
intersect_output = sess.run(intersect)
self.assertAllClose(intersect_output, exp_output)
def test_iou(self):
corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]])
exp_output = [[2.0 / 16.0, 0, 6.0 / 400.0], [1.0 / 16.0, 0.0, 5.0 / 400.0]]
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
iou = box_list_ops.iou(boxes1, boxes2)
with self.test_session() as sess:
iou_output = sess.run(iou)
self.assertAllClose(iou_output, exp_output)
def test_matched_iou(self):
corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]])
exp_output = [2.0 / 16.0, 0]
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
iou = box_list_ops.matched_iou(boxes1, boxes2)
with self.test_session() as sess:
iou_output = sess.run(iou)
self.assertAllClose(iou_output, exp_output)
def test_iouworks_on_empty_inputs(self):
corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]])
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
boxes_empty = box_list.BoxList(tf.zeros((0, 4)))
iou_empty_1 = box_list_ops.iou(boxes1, boxes_empty)
iou_empty_2 = box_list_ops.iou(boxes_empty, boxes2)
iou_empty_3 = box_list_ops.iou(boxes_empty, boxes_empty)
with self.test_session() as sess:
iou_output_1, iou_output_2, iou_output_3 = sess.run(
[iou_empty_1, iou_empty_2, iou_empty_3])
self.assertAllEqual(iou_output_1.shape, (2, 0))
self.assertAllEqual(iou_output_2.shape, (0, 3))
self.assertAllEqual(iou_output_3.shape, (0, 0))
def test_ioa(self):
corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]])
exp_output_1 = [[2.0 / 12.0, 0, 6.0 / 400.0],
[1.0 / 12.0, 0.0, 5.0 / 400.0]]
exp_output_2 = [[2.0 / 6.0, 1.0 / 5.0],
[0, 0],
[6.0 / 6.0, 5.0 / 5.0]]
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
ioa_1 = box_list_ops.ioa(boxes1, boxes2)
ioa_2 = box_list_ops.ioa(boxes2, boxes1)
with self.test_session() as sess:
ioa_output_1, ioa_output_2 = sess.run([ioa_1, ioa_2])
self.assertAllClose(ioa_output_1, exp_output_1)
self.assertAllClose(ioa_output_2, exp_output_2)
def test_prune_non_overlapping_boxes(self):
corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]])
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
minoverlap = 0.5
exp_output_1 = boxes1
exp_output_2 = box_list.BoxList(tf.constant(0.0, shape=[0, 4]))
output_1, keep_indices_1 = box_list_ops.prune_non_overlapping_boxes(
boxes1, boxes2, min_overlap=minoverlap)
output_2, keep_indices_2 = box_list_ops.prune_non_overlapping_boxes(
boxes2, boxes1, min_overlap=minoverlap)
with self.test_session() as sess:
(output_1_, keep_indices_1_, output_2_, keep_indices_2_, exp_output_1_,
exp_output_2_) = sess.run(
[output_1.get(), keep_indices_1,
output_2.get(), keep_indices_2,
exp_output_1.get(), exp_output_2.get()])
self.assertAllClose(output_1_, exp_output_1_)
self.assertAllClose(output_2_, exp_output_2_)
self.assertAllEqual(keep_indices_1_, [0, 1])
self.assertAllEqual(keep_indices_2_, [])
def test_prune_small_boxes(self):
boxes = tf.constant([[4.0, 3.0, 7.0, 5.0],
[5.0, 6.0, 10.0, 7.0],
[3.0, 4.0, 6.0, 8.0],
[14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]])
exp_boxes = [[3.0, 4.0, 6.0, 8.0],
[0.0, 0.0, 20.0, 20.0]]
boxes = box_list.BoxList(boxes)
pruned_boxes = box_list_ops.prune_small_boxes(boxes, 3)
with self.test_session() as sess:
pruned_boxes = sess.run(pruned_boxes.get())
self.assertAllEqual(pruned_boxes, exp_boxes)
def test_prune_small_boxes_prunes_boxes_with_negative_side(self):
boxes = tf.constant([[4.0, 3.0, 7.0, 5.0],
[5.0, 6.0, 10.0, 7.0],
[3.0, 4.0, 6.0, 8.0],
[14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0],
[2.0, 3.0, 1.5, 7.0], # negative height
[2.0, 3.0, 5.0, 1.7]]) # negative width
exp_boxes = [[3.0, 4.0, 6.0, 8.0],
[0.0, 0.0, 20.0, 20.0]]
boxes = box_list.BoxList(boxes)
pruned_boxes = box_list_ops.prune_small_boxes(boxes, 3)
with self.test_session() as sess:
pruned_boxes = sess.run(pruned_boxes.get())
self.assertAllEqual(pruned_boxes, exp_boxes)
def test_change_coordinate_frame(self):
corners = tf.constant([[0.25, 0.5, 0.75, 0.75], [0.5, 0.0, 1.0, 1.0]])
window = tf.constant([0.25, 0.25, 0.75, 0.75])
boxes = box_list.BoxList(corners)
expected_corners = tf.constant([[0, 0.5, 1.0, 1.0], [0.5, -0.5, 1.5, 1.5]])
expected_boxes = box_list.BoxList(expected_corners)
output = box_list_ops.change_coordinate_frame(boxes, window)
with self.test_session() as sess:
output_, expected_boxes_ = sess.run([output.get(), expected_boxes.get()])
self.assertAllClose(output_, expected_boxes_)
def test_ioaworks_on_empty_inputs(self):
corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]])
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
boxes_empty = box_list.BoxList(tf.zeros((0, 4)))
ioa_empty_1 = box_list_ops.ioa(boxes1, boxes_empty)
ioa_empty_2 = box_list_ops.ioa(boxes_empty, boxes2)
ioa_empty_3 = box_list_ops.ioa(boxes_empty, boxes_empty)
with self.test_session() as sess:
ioa_output_1, ioa_output_2, ioa_output_3 = sess.run(
[ioa_empty_1, ioa_empty_2, ioa_empty_3])
self.assertAllEqual(ioa_output_1.shape, (2, 0))
self.assertAllEqual(ioa_output_2.shape, (0, 3))
self.assertAllEqual(ioa_output_3.shape, (0, 0))
def test_pairwise_distances(self):
corners1 = tf.constant([[0.0, 0.0, 0.0, 0.0],
[1.0, 1.0, 0.0, 2.0]])
corners2 = tf.constant([[3.0, 4.0, 1.0, 0.0],
[-4.0, 0.0, 0.0, 3.0],
[0.0, 0.0, 0.0, 0.0]])
exp_output = [[26, 25, 0], [18, 27, 6]]
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
dist_matrix = box_list_ops.sq_dist(boxes1, boxes2)
with self.test_session() as sess:
dist_output = sess.run(dist_matrix)
self.assertAllClose(dist_output, exp_output)
def test_boolean_mask(self):
corners = tf.constant(
[4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]])
indicator = tf.constant([True, False, True, False, True], tf.bool)
expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
boxes = box_list.BoxList(corners)
subset = box_list_ops.boolean_mask(boxes, indicator)
with self.test_session() as sess:
subset_output = sess.run(subset.get())
self.assertAllClose(subset_output, expected_subset)
def test_boolean_mask_with_field(self):
corners = tf.constant(
[4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]])
indicator = tf.constant([True, False, True, False, True], tf.bool)
weights = tf.constant([[.1], [.3], [.5], [.7], [.9]], tf.float32)
expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
expected_weights = [[.1], [.5], [.9]]
boxes = box_list.BoxList(corners)
boxes.add_field('weights', weights)
subset = box_list_ops.boolean_mask(boxes, indicator, ['weights'])
with self.test_session() as sess:
subset_output, weights_output = sess.run(
[subset.get(), subset.get_field('weights')])
self.assertAllClose(subset_output, expected_subset)
self.assertAllClose(weights_output, expected_weights)
def test_gather(self):
corners = tf.constant(
[4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]])
indices = tf.constant([0, 2, 4], tf.int32)
expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
boxes = box_list.BoxList(corners)
subset = box_list_ops.gather(boxes, indices)
with self.test_session() as sess:
subset_output = sess.run(subset.get())
self.assertAllClose(subset_output, expected_subset)
def test_gather_with_field(self):
corners = tf.constant([4*[0.0], 4*[1.0], 4*[2.0], 4*[3.0], 4*[4.0]])
indices = tf.constant([0, 2, 4], tf.int32)
weights = tf.constant([[.1], [.3], [.5], [.7], [.9]], tf.float32)
expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
expected_weights = [[.1], [.5], [.9]]
boxes = box_list.BoxList(corners)
boxes.add_field('weights', weights)
subset = box_list_ops.gather(boxes, indices, ['weights'])
with self.test_session() as sess:
subset_output, weights_output = sess.run(
[subset.get(), subset.get_field('weights')])
self.assertAllClose(subset_output, expected_subset)
self.assertAllClose(weights_output, expected_weights)
def test_gather_with_invalid_field(self):
corners = tf.constant([4 * [0.0], 4 * [1.0]])
indices = tf.constant([0, 1], tf.int32)
weights = tf.constant([[.1], [.3]], tf.float32)
boxes = box_list.BoxList(corners)
boxes.add_field('weights', weights)
with self.assertRaises(ValueError):
box_list_ops.gather(boxes, indices, ['foo', 'bar'])
def test_gather_with_invalid_inputs(self):
corners = tf.constant(
[4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]])
indices_float32 = tf.constant([0, 2, 4], tf.float32)
boxes = box_list.BoxList(corners)
with self.assertRaises(ValueError):
_ = box_list_ops.gather(boxes, indices_float32)
indices_2d = tf.constant([[0, 2, 4]], tf.int32)
boxes = box_list.BoxList(corners)
with self.assertRaises(ValueError):
_ = box_list_ops.gather(boxes, indices_2d)
def test_gather_with_dynamic_indexing(self):
corners = tf.constant([4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]
])
weights = tf.constant([.5, .3, .7, .1, .9], tf.float32)
indices = tf.reshape(tf.where(tf.greater(weights, 0.4)), [-1])
expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
expected_weights = [.5, .7, .9]
boxes = box_list.BoxList(corners)
boxes.add_field('weights', weights)
subset = box_list_ops.gather(boxes, indices, ['weights'])
with self.test_session() as sess:
subset_output, weights_output = sess.run([subset.get(), subset.get_field(
'weights')])
self.assertAllClose(subset_output, expected_subset)
self.assertAllClose(weights_output, expected_weights)
def test_sort_by_field_ascending_order(self):
exp_corners = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9],
[0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]]
exp_scores = [.95, .9, .75, .6, .5, .3]
exp_weights = [.2, .45, .6, .75, .8, .92]
shuffle = [2, 4, 0, 5, 1, 3]
corners = tf.constant([exp_corners[i] for i in shuffle], tf.float32)
boxes = box_list.BoxList(corners)
boxes.add_field('scores', tf.constant(
[exp_scores[i] for i in shuffle], tf.float32))
boxes.add_field('weights', tf.constant(
[exp_weights[i] for i in shuffle], tf.float32))
sort_by_weight = box_list_ops.sort_by_field(
boxes,
'weights',
order=box_list_ops.SortOrder.ascend)
with self.test_session() as sess:
corners_out, scores_out, weights_out = sess.run([
sort_by_weight.get(),
sort_by_weight.get_field('scores'),
sort_by_weight.get_field('weights')])
self.assertAllClose(corners_out, exp_corners)
self.assertAllClose(scores_out, exp_scores)
self.assertAllClose(weights_out, exp_weights)
def test_sort_by_field_descending_order(self):
exp_corners = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9],
[0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]]
exp_scores = [.95, .9, .75, .6, .5, .3]
exp_weights = [.2, .45, .6, .75, .8, .92]
shuffle = [2, 4, 0, 5, 1, 3]
corners = tf.constant([exp_corners[i] for i in shuffle], tf.float32)
boxes = box_list.BoxList(corners)
boxes.add_field('scores', tf.constant(
[exp_scores[i] for i in shuffle], tf.float32))
boxes.add_field('weights', tf.constant(
[exp_weights[i] for i in shuffle], tf.float32))
sort_by_score = box_list_ops.sort_by_field(boxes, 'scores')
with self.test_session() as sess:
corners_out, scores_out, weights_out = sess.run([sort_by_score.get(
), sort_by_score.get_field('scores'), sort_by_score.get_field('weights')])
self.assertAllClose(corners_out, exp_corners)
self.assertAllClose(scores_out, exp_scores)
self.assertAllClose(weights_out, exp_weights)
def test_sort_by_field_invalid_inputs(self):
corners = tf.constant([4 * [0.0], 4 * [0.5], 4 * [1.0], 4 * [2.0], 4 *
[3.0], 4 * [4.0]])
misc = tf.constant([[.95, .9], [.5, .3]], tf.float32)
weights = tf.constant([.1, .2], tf.float32)
boxes = box_list.BoxList(corners)
boxes.add_field('misc', misc)
boxes.add_field('weights', weights)
with self.test_session() as sess:
with self.assertRaises(ValueError):
box_list_ops.sort_by_field(boxes, 'area')
with self.assertRaises(ValueError):
box_list_ops.sort_by_field(boxes, 'misc')
if ops._USE_C_API:
with self.assertRaises(ValueError):
box_list_ops.sort_by_field(boxes, 'weights')
else:
with self.assertRaisesWithPredicateMatch(errors.InvalidArgumentError,
'Incorrect field size'):
sess.run(box_list_ops.sort_by_field(boxes, 'weights').get())
def test_visualize_boxes_in_image(self):
image = tf.zeros((6, 4, 3))
corners = tf.constant([[0, 0, 5, 3],
[0, 0, 3, 2]], tf.float32)
boxes = box_list.BoxList(corners)
image_and_boxes = box_list_ops.visualize_boxes_in_image(image, boxes)
image_and_boxes_bw = tf.to_float(
tf.greater(tf.reduce_sum(image_and_boxes, 2), 0.0))
exp_result = [[1, 1, 1, 0],
[1, 1, 1, 0],
[1, 1, 1, 0],
[1, 0, 1, 0],
[1, 1, 1, 0],
[0, 0, 0, 0]]
with self.test_session() as sess:
output = sess.run(image_and_boxes_bw)
self.assertAllEqual(output.astype(int), exp_result)
def test_filter_field_value_equals(self):
corners = tf.constant([[0, 0, 1, 1],
[0, 0.1, 1, 1.1],
[0, -0.1, 1, 0.9],
[0, 10, 1, 11],
[0, 10.1, 1, 11.1],
[0, 100, 1, 101]], tf.float32)
boxes = box_list.BoxList(corners)
boxes.add_field('classes', tf.constant([1, 2, 1, 2, 2, 1]))
exp_output1 = [[0, 0, 1, 1], [0, -0.1, 1, 0.9], [0, 100, 1, 101]]
exp_output2 = [[0, 0.1, 1, 1.1], [0, 10, 1, 11], [0, 10.1, 1, 11.1]]
filtered_boxes1 = box_list_ops.filter_field_value_equals(
boxes, 'classes', 1)
filtered_boxes2 = box_list_ops.filter_field_value_equals(
boxes, 'classes', 2)
with self.test_session() as sess:
filtered_output1, filtered_output2 = sess.run([filtered_boxes1.get(),
filtered_boxes2.get()])
self.assertAllClose(filtered_output1, exp_output1)
self.assertAllClose(filtered_output2, exp_output2)
def test_filter_greater_than(self):
corners = tf.constant([[0, 0, 1, 1],
[0, 0.1, 1, 1.1],
[0, -0.1, 1, 0.9],
[0, 10, 1, 11],
[0, 10.1, 1, 11.1],
[0, 100, 1, 101]], tf.float32)
boxes = box_list.BoxList(corners)
boxes.add_field('scores', tf.constant([.1, .75, .9, .5, .5, .8]))
thresh = .6
exp_output = [[0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9], [0, 100, 1, 101]]
filtered_boxes = box_list_ops.filter_greater_than(boxes, thresh)
with self.test_session() as sess:
filtered_output = sess.run(filtered_boxes.get())
self.assertAllClose(filtered_output, exp_output)
def test_clip_box_list(self):
boxlist = box_list.BoxList(
tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
[0.6, 0.6, 0.8, 0.8], [0.2, 0.2, 0.3, 0.3]], tf.float32))
boxlist.add_field('classes', tf.constant([0, 0, 1, 1]))
boxlist.add_field('scores', tf.constant([0.75, 0.65, 0.3, 0.2]))
num_boxes = 2
clipped_boxlist = box_list_ops.pad_or_clip_box_list(boxlist, num_boxes)
expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]]
expected_classes = [0, 0]
expected_scores = [0.75, 0.65]
with self.test_session() as sess:
boxes_out, classes_out, scores_out = sess.run(
[clipped_boxlist.get(), clipped_boxlist.get_field('classes'),
clipped_boxlist.get_field('scores')])
self.assertAllClose(expected_boxes, boxes_out)
self.assertAllEqual(expected_classes, classes_out)
self.assertAllClose(expected_scores, scores_out)
def test_pad_box_list(self):
boxlist = box_list.BoxList(
tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32))
boxlist.add_field('classes', tf.constant([0, 1]))
boxlist.add_field('scores', tf.constant([0.75, 0.2]))
num_boxes = 4
padded_boxlist = box_list_ops.pad_or_clip_box_list(boxlist, num_boxes)
expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
[0, 0, 0, 0], [0, 0, 0, 0]]
expected_classes = [0, 1, 0, 0]
expected_scores = [0.75, 0.2, 0, 0]
with self.test_session() as sess:
boxes_out, classes_out, scores_out = sess.run(
[padded_boxlist.get(), padded_boxlist.get_field('classes'),
padded_boxlist.get_field('scores')])
self.assertAllClose(expected_boxes, boxes_out)
self.assertAllEqual(expected_classes, classes_out)
self.assertAllClose(expected_scores, scores_out)
def test_select_random_box(self):
boxes = [[0., 0., 1., 1.],
[0., 1., 2., 3.],
[0., 2., 3., 4.]]
corners = tf.constant(boxes, dtype=tf.float32)
boxlist = box_list.BoxList(corners)
random_bbox, valid = box_list_ops.select_random_box(boxlist)
with self.test_session() as sess:
random_bbox_out, valid_out = sess.run([random_bbox, valid])
norm_small = any(
[np.linalg.norm(random_bbox_out - box) < 1e-6 for box in boxes])
self.assertTrue(norm_small)
self.assertTrue(valid_out)
def test_select_random_box_with_empty_boxlist(self):
corners = tf.constant([], shape=[0, 4], dtype=tf.float32)
boxlist = box_list.BoxList(corners)
random_bbox, valid = box_list_ops.select_random_box(boxlist)
with self.test_session() as sess:
random_bbox_out, valid_out = sess.run([random_bbox, valid])
expected_bbox_out = np.array([[-1., -1., -1., -1.]], dtype=np.float32)
self.assertAllEqual(expected_bbox_out, random_bbox_out)
self.assertFalse(valid_out)
def test_get_minimal_coverage_box(self):
boxes = [[0., 0., 1., 1.],
[-1., 1., 2., 3.],
[0., 2., 3., 4.]]
expected_coverage_box = [[-1., 0., 3., 4.]]
corners = tf.constant(boxes, dtype=tf.float32)
boxlist = box_list.BoxList(corners)
coverage_box = box_list_ops.get_minimal_coverage_box(boxlist)
with self.test_session() as sess:
coverage_box_out = sess.run(coverage_box)
self.assertAllClose(expected_coverage_box, coverage_box_out)
def test_get_minimal_coverage_box_with_empty_boxlist(self):
corners = tf.constant([], shape=[0, 4], dtype=tf.float32)
boxlist = box_list.BoxList(corners)
coverage_box = box_list_ops.get_minimal_coverage_box(boxlist)
with self.test_session() as sess:
coverage_box_out = sess.run(coverage_box)
self.assertAllClose([[0.0, 0.0, 1.0, 1.0]], coverage_box_out)
class ConcatenateTest(tf.test.TestCase):
def test_invalid_input_box_list_list(self):
with self.assertRaises(ValueError):
box_list_ops.concatenate(None)
with self.assertRaises(ValueError):
box_list_ops.concatenate([])
with self.assertRaises(ValueError):
corners = tf.constant([[0, 0, 0, 0]], tf.float32)
boxlist = box_list.BoxList(corners)
box_list_ops.concatenate([boxlist, 2])
def test_concatenate_with_missing_fields(self):
corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32)
scores1 = tf.constant([1.0, 2.1])
corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8]], tf.float32)
boxlist1 = box_list.BoxList(corners1)
boxlist1.add_field('scores', scores1)
boxlist2 = box_list.BoxList(corners2)
with self.assertRaises(ValueError):
box_list_ops.concatenate([boxlist1, boxlist2])
def test_concatenate_with_incompatible_field_shapes(self):
corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32)
scores1 = tf.constant([1.0, 2.1])
corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8]], tf.float32)
scores2 = tf.constant([[1.0, 1.0], [2.1, 3.2]])
boxlist1 = box_list.BoxList(corners1)
boxlist1.add_field('scores', scores1)
boxlist2 = box_list.BoxList(corners2)
boxlist2.add_field('scores', scores2)
with self.assertRaises(ValueError):
box_list_ops.concatenate([boxlist1, boxlist2])
def test_concatenate_is_correct(self):
corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32)
scores1 = tf.constant([1.0, 2.1])
corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8], [1, 0, 5, 10]],
tf.float32)
scores2 = tf.constant([1.0, 2.1, 5.6])
exp_corners = [[0, 0, 0, 0],
[1, 2, 3, 4],
[0, 3, 1, 6],
[2, 4, 3, 8],
[1, 0, 5, 10]]
exp_scores = [1.0, 2.1, 1.0, 2.1, 5.6]
boxlist1 = box_list.BoxList(corners1)
boxlist1.add_field('scores', scores1)
boxlist2 = box_list.BoxList(corners2)
boxlist2.add_field('scores', scores2)
result = box_list_ops.concatenate([boxlist1, boxlist2])
with self.test_session() as sess:
corners_output, scores_output = sess.run(
[result.get(), result.get_field('scores')])
self.assertAllClose(corners_output, exp_corners)
self.assertAllClose(scores_output, exp_scores)
class NonMaxSuppressionTest(tf.test.TestCase):
def test_select_from_three_clusters(self):
corners = tf.constant([[0, 0, 1, 1],
[0, 0.1, 1, 1.1],
[0, -0.1, 1, 0.9],
[0, 10, 1, 11],
[0, 10.1, 1, 11.1],
[0, 100, 1, 101]], tf.float32)
boxes = box_list.BoxList(corners)
boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3]))
iou_thresh = .5
max_output_size = 3
exp_nms = [[0, 10, 1, 11],
[0, 0, 1, 1],
[0, 100, 1, 101]]
nms = box_list_ops.non_max_suppression(
boxes, iou_thresh, max_output_size)
with self.test_session() as sess:
nms_output = sess.run(nms.get())
self.assertAllClose(nms_output, exp_nms)
def test_select_at_most_two_boxes_from_three_clusters(self):
corners = tf.constant([[0, 0, 1, 1],
[0, 0.1, 1, 1.1],
[0, -0.1, 1, 0.9],
[0, 10, 1, 11],
[0, 10.1, 1, 11.1],
[0, 100, 1, 101]], tf.float32)
boxes = box_list.BoxList(corners)
boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3]))
iou_thresh = .5
max_output_size = 2
exp_nms = [[0, 10, 1, 11],
[0, 0, 1, 1]]
nms = box_list_ops.non_max_suppression(
boxes, iou_thresh, max_output_size)
with self.test_session() as sess:
nms_output = sess.run(nms.get())
self.assertAllClose(nms_output, exp_nms)
def test_select_at_most_thirty_boxes_from_three_clusters(self):
corners = tf.constant([[0, 0, 1, 1],
[0, 0.1, 1, 1.1],
[0, -0.1, 1, 0.9],
[0, 10, 1, 11],
[0, 10.1, 1, 11.1],
[0, 100, 1, 101]], tf.float32)
boxes = box_list.BoxList(corners)
boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3]))
iou_thresh = .5
max_output_size = 30
exp_nms = [[0, 10, 1, 11],
[0, 0, 1, 1],
[0, 100, 1, 101]]
nms = box_list_ops.non_max_suppression(
boxes, iou_thresh, max_output_size)
with self.test_session() as sess:
nms_output = sess.run(nms.get())
self.assertAllClose(nms_output, exp_nms)
def test_select_single_box(self):
corners = tf.constant([[0, 0, 1, 1]], tf.float32)
boxes = box_list.BoxList(corners)
boxes.add_field('scores', tf.constant([.9]))
iou_thresh = .5
max_output_size = 3
exp_nms = [[0, 0, 1, 1]]
nms = box_list_ops.non_max_suppression(
boxes, iou_thresh, max_output_size)
with self.test_session() as sess:
nms_output = sess.run(nms.get())
self.assertAllClose(nms_output, exp_nms)
def test_select_from_ten_identical_boxes(self):
corners = tf.constant(10 * [[0, 0, 1, 1]], tf.float32)
boxes = box_list.BoxList(corners)
boxes.add_field('scores', tf.constant(10 * [.9]))
iou_thresh = .5
max_output_size = 3
exp_nms = [[0, 0, 1, 1]]
nms = box_list_ops.non_max_suppression(
boxes, iou_thresh, max_output_size)
with self.test_session() as sess:
nms_output = sess.run(nms.get())
self.assertAllClose(nms_output, exp_nms)
def test_copy_extra_fields(self):
corners = tf.constant([[0, 0, 1, 1],
[0, 0.1, 1, 1.1]], tf.float32)
boxes = box_list.BoxList(corners)
tensor1 = np.array([[1], [4]])
tensor2 = np.array([[1, 1], [2, 2]])
boxes.add_field('tensor1', tf.constant(tensor1))
boxes.add_field('tensor2', tf.constant(tensor2))
new_boxes = box_list.BoxList(tf.constant([[0, 0, 10, 10],
[1, 3, 5, 5]], tf.float32))
new_boxes = box_list_ops._copy_extra_fields(new_boxes, boxes)
with self.test_session() as sess:
self.assertAllClose(tensor1, sess.run(new_boxes.get_field('tensor1')))
self.assertAllClose(tensor2, sess.run(new_boxes.get_field('tensor2')))
class CoordinatesConversionTest(tf.test.TestCase):
def test_to_normalized_coordinates(self):
coordinates = tf.constant([[0, 0, 100, 100],
[25, 25, 75, 75]], tf.float32)
img = tf.ones((128, 100, 100, 3))
boxlist = box_list.BoxList(coordinates)
normalized_boxlist = box_list_ops.to_normalized_coordinates(
boxlist, tf.shape(img)[1], tf.shape(img)[2])
expected_boxes = [[0, 0, 1, 1],
[0.25, 0.25, 0.75, 0.75]]
with self.test_session() as sess:
normalized_boxes = sess.run(normalized_boxlist.get())
self.assertAllClose(normalized_boxes, expected_boxes)
def test_to_normalized_coordinates_already_normalized(self):
coordinates = tf.constant([[0, 0, 1, 1],
[0.25, 0.25, 0.75, 0.75]], tf.float32)
img = tf.ones((128, 100, 100, 3))
boxlist = box_list.BoxList(coordinates)
normalized_boxlist = box_list_ops.to_normalized_coordinates(
boxlist, tf.shape(img)[1], tf.shape(img)[2])
with self.test_session() as sess:
with self.assertRaisesOpError('assertion failed'):
sess.run(normalized_boxlist.get())
def test_to_absolute_coordinates(self):
coordinates = tf.constant([[0, 0, 1, 1],
[0.25, 0.25, 0.75, 0.75]], tf.float32)
img = tf.ones((128, 100, 100, 3))
boxlist = box_list.BoxList(coordinates)
absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist,
tf.shape(img)[1],
tf.shape(img)[2])
expected_boxes = [[0, 0, 100, 100],
[25, 25, 75, 75]]
with self.test_session() as sess:
absolute_boxes = sess.run(absolute_boxlist.get())
self.assertAllClose(absolute_boxes, expected_boxes)
def test_to_absolute_coordinates_already_abolute(self):
coordinates = tf.constant([[0, 0, 100, 100],
[25, 25, 75, 75]], tf.float32)
img = tf.ones((128, 100, 100, 3))
boxlist = box_list.BoxList(coordinates)
absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist,
tf.shape(img)[1],
tf.shape(img)[2])
with self.test_session() as sess:
with self.assertRaisesOpError('assertion failed'):
sess.run(absolute_boxlist.get())
def test_convert_to_normalized_and_back(self):
coordinates = np.random.uniform(size=(100, 4))
coordinates = np.round(np.sort(coordinates) * 200)
coordinates[:, 2:4] += 1
coordinates[99, :] = [0, 0, 201, 201]
img = tf.ones((128, 202, 202, 3))
boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32))
boxlist = box_list_ops.to_normalized_coordinates(boxlist,
tf.shape(img)[1],
tf.shape(img)[2])
boxlist = box_list_ops.to_absolute_coordinates(boxlist,
tf.shape(img)[1],
tf.shape(img)[2])
with self.test_session() as sess:
out = sess.run(boxlist.get())
self.assertAllClose(out, coordinates)
def test_convert_to_absolute_and_back(self):
coordinates = np.random.uniform(size=(100, 4))
coordinates = np.sort(coordinates)
coordinates[99, :] = [0, 0, 1, 1]
img = tf.ones((128, 202, 202, 3))
boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32))
boxlist = box_list_ops.to_absolute_coordinates(boxlist,
tf.shape(img)[1],
tf.shape(img)[2])
boxlist = box_list_ops.to_normalized_coordinates(boxlist,
tf.shape(img)[1],
tf.shape(img)[2])
with self.test_session() as sess:
out = sess.run(boxlist.get())
self.assertAllClose(out, coordinates)
def test_to_absolute_coordinates_maximum_coordinate_check(self):
coordinates = tf.constant([[0, 0, 1.2, 1.2],
[0.25, 0.25, 0.75, 0.75]], tf.float32)
img = tf.ones((128, 100, 100, 3))
boxlist = box_list.BoxList(coordinates)
absolute_boxlist = box_list_ops.to_absolute_coordinates(
boxlist,
tf.shape(img)[1],
tf.shape(img)[2],
maximum_normalized_coordinate=1.1)
with self.test_session() as sess:
with self.assertRaisesOpError('assertion failed'):
sess.run(absolute_boxlist.get())
class BoxRefinementTest(tf.test.TestCase):
def test_box_voting(self):
candidates = box_list.BoxList(
tf.constant([[0.1, 0.1, 0.4, 0.4], [0.6, 0.6, 0.8, 0.8]], tf.float32))
candidates.add_field('ExtraField', tf.constant([1, 2]))
pool = box_list.BoxList(
tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
[0.6, 0.6, 0.8, 0.8]], tf.float32))
pool.add_field('scores', tf.constant([0.75, 0.25, 0.3]))
averaged_boxes = box_list_ops.box_voting(candidates, pool)
expected_boxes = [[0.1, 0.1, 0.425, 0.425], [0.6, 0.6, 0.8, 0.8]]
expected_scores = [0.5, 0.3]
with self.test_session() as sess:
boxes_out, scores_out, extra_field_out = sess.run(
[averaged_boxes.get(), averaged_boxes.get_field('scores'),
averaged_boxes.get_field('ExtraField')])
self.assertAllClose(expected_boxes, boxes_out)
self.assertAllClose(expected_scores, scores_out)
self.assertAllEqual(extra_field_out, [1, 2])
def test_box_voting_fails_with_negative_scores(self):
candidates = box_list.BoxList(
tf.constant([[0.1, 0.1, 0.4, 0.4]], tf.float32))
pool = box_list.BoxList(tf.constant([[0.1, 0.1, 0.4, 0.4]], tf.float32))
pool.add_field('scores', tf.constant([-0.2]))
averaged_boxes = box_list_ops.box_voting(candidates, pool)
with self.test_session() as sess:
with self.assertRaisesOpError('Scores must be non negative'):
sess.run([averaged_boxes.get()])
def test_box_voting_fails_when_unmatched(self):
candidates = box_list.BoxList(
tf.constant([[0.1, 0.1, 0.4, 0.4]], tf.float32))
pool = box_list.BoxList(tf.constant([[0.6, 0.6, 0.8, 0.8]], tf.float32))
pool.add_field('scores', tf.constant([0.2]))
averaged_boxes = box_list_ops.box_voting(candidates, pool)
with self.test_session() as sess:
with self.assertRaisesOpError('Each box in selected_boxes must match '
'with at least one box in pool_boxes.'):
sess.run([averaged_boxes.get()])
def test_refine_boxes(self):
pool = box_list.BoxList(
tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
[0.6, 0.6, 0.8, 0.8]], tf.float32))
pool.add_field('ExtraField', tf.constant([1, 2, 3]))
pool.add_field('scores', tf.constant([0.75, 0.25, 0.3]))
refined_boxes = box_list_ops.refine_boxes(pool, 0.5, 10)
expected_boxes = [[0.1, 0.1, 0.425, 0.425], [0.6, 0.6, 0.8, 0.8]]
expected_scores = [0.5, 0.3]
with self.test_session() as sess:
boxes_out, scores_out, extra_field_out = sess.run(
[refined_boxes.get(), refined_boxes.get_field('scores'),
refined_boxes.get_field('ExtraField')])
self.assertAllClose(expected_boxes, boxes_out)
self.assertAllClose(expected_scores, scores_out)
self.assertAllEqual(extra_field_out, [1, 3])
def test_refine_boxes_multi_class(self):
pool = box_list.BoxList(
tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
[0.6, 0.6, 0.8, 0.8], [0.2, 0.2, 0.3, 0.3]], tf.float32))
pool.add_field('classes', tf.constant([0, 0, 1, 1]))
pool.add_field('scores', tf.constant([0.75, 0.25, 0.3, 0.2]))
refined_boxes = box_list_ops.refine_boxes_multi_class(pool, 3, 0.5, 10)
expected_boxes = [[0.1, 0.1, 0.425, 0.425], [0.6, 0.6, 0.8, 0.8],
[0.2, 0.2, 0.3, 0.3]]
expected_scores = [0.5, 0.3, 0.2]
with self.test_session() as sess:
boxes_out, scores_out, extra_field_out = sess.run(
[refined_boxes.get(), refined_boxes.get_field('scores'),
refined_boxes.get_field('classes')])
self.assertAllClose(expected_boxes, boxes_out)
self.assertAllClose(expected_scores, scores_out)
self.assertAllEqual(extra_field_out, [0, 1, 1])
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.box_list."""
import tensorflow as tf
from object_detection.core import box_list
class BoxListTest(tf.test.TestCase):
"""Tests for BoxList class."""
def test_num_boxes(self):
data = tf.constant([[0, 0, 1, 1], [1, 1, 2, 3], [3, 4, 5, 5]], tf.float32)
expected_num_boxes = 3
boxes = box_list.BoxList(data)
with self.test_session() as sess:
num_boxes_output = sess.run(boxes.num_boxes())
self.assertEquals(num_boxes_output, expected_num_boxes)
def test_get_correct_center_coordinates_and_sizes(self):
boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
boxes = box_list.BoxList(tf.constant(boxes))
centers_sizes = boxes.get_center_coordinates_and_sizes()
expected_centers_sizes = [[15, 0.35], [12.5, 0.25], [10, 0.3], [5, 0.3]]
with self.test_session() as sess:
centers_sizes_out = sess.run(centers_sizes)
self.assertAllClose(centers_sizes_out, expected_centers_sizes)
def test_create_box_list_with_dynamic_shape(self):
data = tf.constant([[0, 0, 1, 1], [1, 1, 2, 3], [3, 4, 5, 5]], tf.float32)
indices = tf.reshape(tf.where(tf.greater([1, 0, 1], 0)), [-1])
data = tf.gather(data, indices)
assert data.get_shape().as_list() == [None, 4]
expected_num_boxes = 2
boxes = box_list.BoxList(data)
with self.test_session() as sess:
num_boxes_output = sess.run(boxes.num_boxes())
self.assertEquals(num_boxes_output, expected_num_boxes)
def test_transpose_coordinates(self):
boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
boxes = box_list.BoxList(tf.constant(boxes))
boxes.transpose_coordinates()
expected_corners = [[10.0, 10.0, 15.0, 20.0], [0.1, 0.2, 0.4, 0.5]]
with self.test_session() as sess:
corners_out = sess.run(boxes.get())
self.assertAllClose(corners_out, expected_corners)
def test_box_list_invalid_inputs(self):
data0 = tf.constant([[[0, 0, 1, 1], [3, 4, 5, 5]]], tf.float32)
data1 = tf.constant([[0, 0, 1], [1, 1, 2], [3, 4, 5]], tf.float32)
data2 = tf.constant([[0, 0, 1], [1, 1, 2], [3, 4, 5]], tf.int32)
with self.assertRaises(ValueError):
_ = box_list.BoxList(data0)
with self.assertRaises(ValueError):
_ = box_list.BoxList(data1)
with self.assertRaises(ValueError):
_ = box_list.BoxList(data2)
def test_num_boxes_static(self):
box_corners = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
boxes = box_list.BoxList(tf.constant(box_corners))
self.assertEquals(boxes.num_boxes_static(), 2)
self.assertEquals(type(boxes.num_boxes_static()), int)
def test_num_boxes_static_for_uninferrable_shape(self):
placeholder = tf.placeholder(tf.float32, shape=[None, 4])
boxes = box_list.BoxList(placeholder)
self.assertEquals(boxes.num_boxes_static(), None)
def test_as_tensor_dict(self):
boxlist = box_list.BoxList(
tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32))
boxlist.add_field('classes', tf.constant([0, 1]))
boxlist.add_field('scores', tf.constant([0.75, 0.2]))
tensor_dict = boxlist.as_tensor_dict()
expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]]
expected_classes = [0, 1]
expected_scores = [0.75, 0.2]
with self.test_session() as sess:
tensor_dict_out = sess.run(tensor_dict)
self.assertAllEqual(3, len(tensor_dict_out))
self.assertAllClose(expected_boxes, tensor_dict_out['boxes'])
self.assertAllEqual(expected_classes, tensor_dict_out['classes'])
self.assertAllClose(expected_scores, tensor_dict_out['scores'])
def test_as_tensor_dict_with_features(self):
boxlist = box_list.BoxList(
tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32))
boxlist.add_field('classes', tf.constant([0, 1]))
boxlist.add_field('scores', tf.constant([0.75, 0.2]))
tensor_dict = boxlist.as_tensor_dict(['boxes', 'classes', 'scores'])
expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]]
expected_classes = [0, 1]
expected_scores = [0.75, 0.2]
with self.test_session() as sess:
tensor_dict_out = sess.run(tensor_dict)
self.assertAllEqual(3, len(tensor_dict_out))
self.assertAllClose(expected_boxes, tensor_dict_out['boxes'])
self.assertAllEqual(expected_classes, tensor_dict_out['classes'])
self.assertAllClose(expected_scores, tensor_dict_out['scores'])
def test_as_tensor_dict_missing_field(self):
boxlist = box_list.BoxList(
tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32))
boxlist.add_field('classes', tf.constant([0, 1]))
boxlist.add_field('scores', tf.constant([0.75, 0.2]))
with self.assertRaises(ValueError):
boxlist.as_tensor_dict(['foo', 'bar'])
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Box predictor for object detectors.
Box predictors are classes that take a high level
image feature map as input and produce two predictions,
(1) a tensor encoding box locations, and
(2) a tensor encoding classes for each box.
These components are passed directly to loss functions
in our detection models.
These modules are separated from the main model since the same
few box predictor architectures are shared across many models.
"""
from abc import abstractmethod
import math
import tensorflow as tf
from object_detection.utils import ops
from object_detection.utils import shape_utils
from object_detection.utils import static_shape
slim = tf.contrib.slim
BOX_ENCODINGS = 'box_encodings'
CLASS_PREDICTIONS_WITH_BACKGROUND = 'class_predictions_with_background'
MASK_PREDICTIONS = 'mask_predictions'
class BoxPredictor(object):
"""BoxPredictor."""
def __init__(self, is_training, num_classes):
"""Constructor.
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: number of classes. Note that num_classes *does not*
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
"""
self._is_training = is_training
self._num_classes = num_classes
@property
def num_classes(self):
return self._num_classes
def predict(self, image_features, num_predictions_per_location,
scope=None, **params):
"""Computes encoded object locations and corresponding confidences.
Takes a list of high level image feature maps as input and produces a list
of box encodings and a list of class scores where each element in the output
lists correspond to the feature maps in the input list.
Args:
image_features: A list of float tensors of shape [batch_size, height_i,
width_i, channels_i] containing features for a batch of images.
num_predictions_per_location: A list of integers representing the number
of box predictions to be made per spatial location for each feature map.
scope: Variable and Op scope name.
**params: Additional keyword arguments for specific implementations of
BoxPredictor.
Returns:
A dictionary containing at least the following tensors.
box_encodings: A list of float tensors. Each entry in the list
corresponds to a feature map in the input `image_features` list. All
tensors in the list have one of the two following shapes:
a. [batch_size, num_anchors_i, q, code_size] representing the location
of the objects, where q is 1 or the number of classes.
b. [batch_size, num_anchors_i, code_size].
class_predictions_with_background: A list of float tensors of shape
[batch_size, num_anchors_i, num_classes + 1] representing the class
predictions for the proposals. Each entry in the list corresponds to a
feature map in the input `image_features` list.
Raises:
ValueError: If length of `image_features` is not equal to length of
`num_predictions_per_location`.
"""
if len(image_features) != len(num_predictions_per_location):
raise ValueError('image_feature and num_predictions_per_location must '
'be of same length, found: {} vs {}'.
format(len(image_features),
len(num_predictions_per_location)))
if scope is not None:
with tf.variable_scope(scope):
return self._predict(image_features, num_predictions_per_location,
**params)
return self._predict(image_features, num_predictions_per_location,
**params)
# TODO(rathodv): num_predictions_per_location could be moved to constructor.
# This is currently only used by ConvolutionalBoxPredictor.
@abstractmethod
def _predict(self, image_features, num_predictions_per_location, **params):
"""Implementations must override this method.
Args:
image_features: A list of float tensors of shape [batch_size, height_i,
width_i, channels_i] containing features for a batch of images.
num_predictions_per_location: A list of integers representing the number
of box predictions to be made per spatial location for each feature map.
**params: Additional keyword arguments for specific implementations of
BoxPredictor.
Returns:
A dictionary containing at least the following tensors.
box_encodings: A list of float tensors. Each entry in the list
corresponds to a feature map in the input `image_features` list. All
tensors in the list have one of the two following shapes:
a. [batch_size, num_anchors_i, q, code_size] representing the location
of the objects, where q is 1 or the number of classes.
b. [batch_size, num_anchors_i, code_size].
class_predictions_with_background: A list of float tensors of shape
[batch_size, num_anchors_i, num_classes + 1] representing the class
predictions for the proposals. Each entry in the list corresponds to a
feature map in the input `image_features` list.
"""
pass
class RfcnBoxPredictor(BoxPredictor):
"""RFCN Box Predictor.
Applies a position sensitive ROI pooling on position sensitive feature maps to
predict classes and refined locations. See https://arxiv.org/abs/1605.06409
for details.
This is used for the second stage of the RFCN meta architecture. Notice that
locations are *not* shared across classes, thus for each anchor, a separate
prediction is made for each class.
"""
def __init__(self,
is_training,
num_classes,
conv_hyperparams_fn,
num_spatial_bins,
depth,
crop_size,
box_code_size):
"""Constructor.
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: number of classes. Note that num_classes *does not*
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
conv_hyperparams_fn: A function to construct tf-slim arg_scope with
hyperparameters for convolutional layers.
num_spatial_bins: A list of two integers `[spatial_bins_y,
spatial_bins_x]`.
depth: Target depth to reduce the input feature maps to.
crop_size: A list of two integers `[crop_height, crop_width]`.
box_code_size: Size of encoding for each box.
"""
super(RfcnBoxPredictor, self).__init__(is_training, num_classes)
self._conv_hyperparams_fn = conv_hyperparams_fn
self._num_spatial_bins = num_spatial_bins
self._depth = depth
self._crop_size = crop_size
self._box_code_size = box_code_size
@property
def num_classes(self):
return self._num_classes
def _predict(self, image_features, num_predictions_per_location,
proposal_boxes):
"""Computes encoded object locations and corresponding confidences.
Args:
image_features: A list of float tensors of shape [batch_size, height_i,
width_i, channels_i] containing features for a batch of images.
num_predictions_per_location: A list of integers representing the number
of box predictions to be made per spatial location for each feature map.
Currently, this must be set to [1], or an error will be raised.
proposal_boxes: A float tensor of shape [batch_size, num_proposals,
box_code_size].
Returns:
box_encodings: A list of float tensors of shape
[batch_size, num_anchors_i, q, code_size] representing the location of
the objects, where q is 1 or the number of classes. Each entry in the
list corresponds to a feature map in the input `image_features` list.
class_predictions_with_background: A list of float tensors of shape
[batch_size, num_anchors_i, num_classes + 1] representing the class
predictions for the proposals. Each entry in the list corresponds to a
feature map in the input `image_features` list.
Raises:
ValueError: if num_predictions_per_location is not 1 or if
len(image_features) is not 1.
"""
if (len(num_predictions_per_location) != 1 or
num_predictions_per_location[0] != 1):
raise ValueError('Currently RfcnBoxPredictor only supports '
'predicting a single box per class per location.')
if len(image_features) != 1:
raise ValueError('length of `image_features` must be 1. Found {}'.
format(len(image_features)))
image_feature = image_features[0]
num_predictions_per_location = num_predictions_per_location[0]
batch_size = tf.shape(proposal_boxes)[0]
num_boxes = tf.shape(proposal_boxes)[1]
def get_box_indices(proposals):
proposals_shape = proposals.get_shape().as_list()
if any(dim is None for dim in proposals_shape):
proposals_shape = tf.shape(proposals)
ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32)
multiplier = tf.expand_dims(
tf.range(start=0, limit=proposals_shape[0]), 1)
return tf.reshape(ones_mat * multiplier, [-1])
net = image_feature
with slim.arg_scope(self._conv_hyperparams_fn()):
net = slim.conv2d(net, self._depth, [1, 1], scope='reduce_depth')
# Location predictions.
location_feature_map_depth = (self._num_spatial_bins[0] *
self._num_spatial_bins[1] *
self.num_classes *
self._box_code_size)
location_feature_map = slim.conv2d(net, location_feature_map_depth,
[1, 1], activation_fn=None,
scope='refined_locations')
box_encodings = ops.position_sensitive_crop_regions(
location_feature_map,
boxes=tf.reshape(proposal_boxes, [-1, self._box_code_size]),
box_ind=get_box_indices(proposal_boxes),
crop_size=self._crop_size,
num_spatial_bins=self._num_spatial_bins,
global_pool=True)
box_encodings = tf.squeeze(box_encodings, squeeze_dims=[1, 2])
box_encodings = tf.reshape(box_encodings,
[batch_size * num_boxes, 1, self.num_classes,
self._box_code_size])
# Class predictions.
total_classes = self.num_classes + 1 # Account for background class.
class_feature_map_depth = (self._num_spatial_bins[0] *
self._num_spatial_bins[1] *
total_classes)
class_feature_map = slim.conv2d(net, class_feature_map_depth, [1, 1],
activation_fn=None,
scope='class_predictions')
class_predictions_with_background = ops.position_sensitive_crop_regions(
class_feature_map,
boxes=tf.reshape(proposal_boxes, [-1, self._box_code_size]),
box_ind=get_box_indices(proposal_boxes),
crop_size=self._crop_size,
num_spatial_bins=self._num_spatial_bins,
global_pool=True)
class_predictions_with_background = tf.squeeze(
class_predictions_with_background, squeeze_dims=[1, 2])
class_predictions_with_background = tf.reshape(
class_predictions_with_background,
[batch_size * num_boxes, 1, total_classes])
return {BOX_ENCODINGS: [box_encodings],
CLASS_PREDICTIONS_WITH_BACKGROUND:
[class_predictions_with_background]}
# TODO(rathodv): Change the implementation to return lists of predictions.
class MaskRCNNBoxPredictor(BoxPredictor):
"""Mask R-CNN Box Predictor.
See Mask R-CNN: He, K., Gkioxari, G., Dollar, P., & Girshick, R. (2017).
Mask R-CNN. arXiv preprint arXiv:1703.06870.
This is used for the second stage of the Mask R-CNN detector where proposals
cropped from an image are arranged along the batch dimension of the input
image_features tensor. Notice that locations are *not* shared across classes,
thus for each anchor, a separate prediction is made for each class.
In addition to predicting boxes and classes, optionally this class allows
predicting masks and/or keypoints inside detection boxes.
Currently this box predictor makes per-class predictions; that is, each
anchor makes a separate box prediction for each class.
"""
def __init__(self,
is_training,
num_classes,
fc_hyperparams_fn,
use_dropout,
dropout_keep_prob,
box_code_size,
conv_hyperparams_fn=None,
predict_instance_masks=False,
mask_height=14,
mask_width=14,
mask_prediction_num_conv_layers=2,
mask_prediction_conv_depth=256,
masks_are_class_agnostic=False,
predict_keypoints=False,
share_box_across_classes=False):
"""Constructor.
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: number of classes. Note that num_classes *does not*
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
fc_hyperparams_fn: A function to generate tf-slim arg_scope with
hyperparameters for fully connected ops.
use_dropout: Option to use dropout or not. Note that a single dropout
op is applied here prior to both box and class predictions, which stands
in contrast to the ConvolutionalBoxPredictor below.
dropout_keep_prob: Keep probability for dropout.
This is only used if use_dropout is True.
box_code_size: Size of encoding for each box.
conv_hyperparams_fn: A function to generate tf-slim arg_scope with
hyperparameters for convolution ops.
predict_instance_masks: Whether to predict object masks inside detection
boxes.
mask_height: Desired output mask height. The default value is 14.
mask_width: Desired output mask width. The default value is 14.
mask_prediction_num_conv_layers: Number of convolution layers applied to
the image_features in mask prediction branch.
mask_prediction_conv_depth: The depth for the first conv2d_transpose op
applied to the image_features in the mask prediction branch. If set
to 0, the depth of the convolution layers will be automatically chosen
based on the number of object classes and the number of channels in the
image features.
masks_are_class_agnostic: Boolean determining if the mask-head is
class-agnostic or not.
predict_keypoints: Whether to predict keypoints insde detection boxes.
share_box_across_classes: Whether to share boxes across classes rather
than use a different box for each class.
Raises:
ValueError: If predict_instance_masks is true but conv_hyperparams is not
set.
ValueError: If predict_keypoints is true since it is not implemented yet.
ValueError: If mask_prediction_num_conv_layers is smaller than two.
"""
super(MaskRCNNBoxPredictor, self).__init__(is_training, num_classes)
self._fc_hyperparams_fn = fc_hyperparams_fn
self._use_dropout = use_dropout
self._box_code_size = box_code_size
self._dropout_keep_prob = dropout_keep_prob
self._conv_hyperparams_fn = conv_hyperparams_fn
self._predict_instance_masks = predict_instance_masks
self._mask_height = mask_height
self._mask_width = mask_width
self._mask_prediction_num_conv_layers = mask_prediction_num_conv_layers
self._mask_prediction_conv_depth = mask_prediction_conv_depth
self._masks_are_class_agnostic = masks_are_class_agnostic
self._predict_keypoints = predict_keypoints
self._share_box_across_classes = share_box_across_classes
if self._predict_keypoints:
raise ValueError('Keypoint prediction is unimplemented.')
if ((self._predict_instance_masks or self._predict_keypoints) and
self._conv_hyperparams_fn is None):
raise ValueError('`conv_hyperparams` must be provided when predicting '
'masks.')
if self._mask_prediction_num_conv_layers < 2:
raise ValueError(
'Mask prediction should consist of at least 2 conv layers')
@property
def num_classes(self):
return self._num_classes
@property
def predicts_instance_masks(self):
return self._predict_instance_masks
def _predict_boxes_and_classes(self, image_features):
"""Predicts boxes and class scores.
Args:
image_features: A float tensor of shape [batch_size, height, width,
channels] containing features for a batch of images.
Returns:
box_encodings: A float tensor of shape
[batch_size, 1, num_classes, code_size] representing the location of the
objects.
class_predictions_with_background: A float tensor of shape
[batch_size, 1, num_classes + 1] representing the class predictions for
the proposals.
"""
spatial_averaged_image_features = tf.reduce_mean(image_features, [1, 2],
keep_dims=True,
name='AvgPool')
flattened_image_features = slim.flatten(spatial_averaged_image_features)
if self._use_dropout:
flattened_image_features = slim.dropout(flattened_image_features,
keep_prob=self._dropout_keep_prob,
is_training=self._is_training)
number_of_boxes = 1
if not self._share_box_across_classes:
number_of_boxes = self._num_classes
with slim.arg_scope(self._fc_hyperparams_fn()):
box_encodings = slim.fully_connected(
flattened_image_features,
number_of_boxes * self._box_code_size,
activation_fn=None,
scope='BoxEncodingPredictor')
class_predictions_with_background = slim.fully_connected(
flattened_image_features,
self._num_classes + 1,
activation_fn=None,
scope='ClassPredictor')
box_encodings = tf.reshape(
box_encodings, [-1, 1, number_of_boxes, self._box_code_size])
class_predictions_with_background = tf.reshape(
class_predictions_with_background, [-1, 1, self._num_classes + 1])
return box_encodings, class_predictions_with_background
def _get_mask_predictor_conv_depth(self, num_feature_channels, num_classes,
class_weight=3.0, feature_weight=2.0):
"""Computes the depth of the mask predictor convolutions.
Computes the depth of the mask predictor convolutions given feature channels
and number of classes by performing a weighted average of the two in
log space to compute the number of convolution channels. The weights that
are used for computing the weighted average do not need to sum to 1.
Args:
num_feature_channels: An integer containing the number of feature
channels.
num_classes: An integer containing the number of classes.
class_weight: Class weight used in computing the weighted average.
feature_weight: Feature weight used in computing the weighted average.
Returns:
An integer containing the number of convolution channels used by mask
predictor.
"""
num_feature_channels_log = math.log(float(num_feature_channels), 2.0)
num_classes_log = math.log(float(num_classes), 2.0)
weighted_num_feature_channels_log = (
num_feature_channels_log * feature_weight)
weighted_num_classes_log = num_classes_log * class_weight
total_weight = feature_weight + class_weight
num_conv_channels_log = round(
(weighted_num_feature_channels_log + weighted_num_classes_log) /
total_weight)
return int(math.pow(2.0, num_conv_channels_log))
def _predict_masks(self, image_features):
"""Performs mask prediction.
Args:
image_features: A float tensor of shape [batch_size, height, width,
channels] containing features for a batch of images.
Returns:
instance_masks: A float tensor of shape
[batch_size, 1, num_classes, image_height, image_width].
"""
num_conv_channels = self._mask_prediction_conv_depth
if num_conv_channels == 0:
num_feature_channels = image_features.get_shape().as_list()[3]
num_conv_channels = self._get_mask_predictor_conv_depth(
num_feature_channels, self.num_classes)
with slim.arg_scope(self._conv_hyperparams_fn()):
upsampled_features = tf.image.resize_bilinear(
image_features,
[self._mask_height, self._mask_width],
align_corners=True)
for _ in range(self._mask_prediction_num_conv_layers - 1):
upsampled_features = slim.conv2d(
upsampled_features,
num_outputs=num_conv_channels,
kernel_size=[3, 3])
num_masks = 1 if self._masks_are_class_agnostic else self.num_classes
mask_predictions = slim.conv2d(upsampled_features,
num_outputs=num_masks,
activation_fn=None,
kernel_size=[3, 3])
return tf.expand_dims(
tf.transpose(mask_predictions, perm=[0, 3, 1, 2]),
axis=1,
name='MaskPredictor')
def _predict(self, image_features, num_predictions_per_location,
predict_boxes_and_classes=True, predict_auxiliary_outputs=False):
"""Optionally computes encoded object locations, confidences, and masks.
Flattens image_features and applies fully connected ops (with no
non-linearity) to predict box encodings and class predictions. In this
setting, anchors are not spatially arranged in any way and are assumed to
have been folded into the batch dimension. Thus we output 1 for the
anchors dimension.
Also optionally predicts instance masks.
The mask prediction head is based on the Mask RCNN paper with the following
modifications: We replace the deconvolution layer with a bilinear resize
and a convolution.
Args:
image_features: A list of float tensors of shape [batch_size, height_i,
width_i, channels_i] containing features for a batch of images.
num_predictions_per_location: A list of integers representing the number
of box predictions to be made per spatial location for each feature map.
Currently, this must be set to [1], or an error will be raised.
predict_boxes_and_classes: If true, the function will perform box
refinement and classification.
predict_auxiliary_outputs: If true, the function will perform other
predictions such as mask, keypoint, boundaries, etc. if any.
Returns:
A dictionary containing the following tensors.
box_encodings: A float tensor of shape
[batch_size, 1, num_classes, code_size] representing the
location of the objects.
class_predictions_with_background: A float tensor of shape
[batch_size, 1, num_classes + 1] representing the class
predictions for the proposals.
If predict_masks is True the dictionary also contains:
instance_masks: A float tensor of shape
[batch_size, 1, num_classes, image_height, image_width]
If predict_keypoints is True the dictionary also contains:
keypoints: [batch_size, 1, num_keypoints, 2]
Raises:
ValueError: If num_predictions_per_location is not 1 or if both
predict_boxes_and_classes and predict_auxiliary_outputs are false or if
len(image_features) is not 1.
"""
if (len(num_predictions_per_location) != 1 or
num_predictions_per_location[0] != 1):
raise ValueError('Currently FullyConnectedBoxPredictor only supports '
'predicting a single box per class per location.')
if not predict_boxes_and_classes and not predict_auxiliary_outputs:
raise ValueError('Should perform at least one prediction.')
if len(image_features) != 1:
raise ValueError('length of `image_features` must be 1. Found {}'.
format(len(image_features)))
image_feature = image_features[0]
num_predictions_per_location = num_predictions_per_location[0]
predictions_dict = {}
if predict_boxes_and_classes:
(box_encodings, class_predictions_with_background
) = self._predict_boxes_and_classes(image_feature)
predictions_dict[BOX_ENCODINGS] = box_encodings
predictions_dict[
CLASS_PREDICTIONS_WITH_BACKGROUND] = class_predictions_with_background
if self._predict_instance_masks and predict_auxiliary_outputs:
predictions_dict[MASK_PREDICTIONS] = self._predict_masks(image_feature)
return predictions_dict
class _NoopVariableScope(object):
"""A dummy class that does not push any scope."""
def __enter__(self):
return None
def __exit__(self, exc_type, exc_value, traceback):
return False
class ConvolutionalBoxPredictor(BoxPredictor):
"""Convolutional Box Predictor.
Optionally add an intermediate 1x1 convolutional layer after features and
predict in parallel branches box_encodings and
class_predictions_with_background.
Currently this box predictor assumes that predictions are "shared" across
classes --- that is each anchor makes box predictions which do not depend
on class.
"""
def __init__(self,
is_training,
num_classes,
conv_hyperparams_fn,
min_depth,
max_depth,
num_layers_before_predictor,
use_dropout,
dropout_keep_prob,
kernel_size,
box_code_size,
apply_sigmoid_to_scores=False,
class_prediction_bias_init=0.0,
use_depthwise=False):
"""Constructor.
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: number of classes. Note that num_classes *does not*
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
conv_hyperparams_fn: A function to generate tf-slim arg_scope with
hyperparameters for convolution ops.
min_depth: Minimum feature depth prior to predicting box encodings
and class predictions.
max_depth: Maximum feature depth prior to predicting box encodings
and class predictions. If max_depth is set to 0, no additional
feature map will be inserted before location and class predictions.
num_layers_before_predictor: Number of the additional conv layers before
the predictor.
use_dropout: Option to use dropout for class prediction or not.
dropout_keep_prob: Keep probability for dropout.
This is only used if use_dropout is True.
kernel_size: Size of final convolution kernel. If the
spatial resolution of the feature map is smaller than the kernel size,
then the kernel size is automatically set to be
min(feature_width, feature_height).
box_code_size: Size of encoding for each box.
apply_sigmoid_to_scores: if True, apply the sigmoid on the output
class_predictions.
class_prediction_bias_init: constant value to initialize bias of the last
conv2d layer before class prediction.
use_depthwise: Whether to use depthwise convolutions for prediction
steps. Default is False.
Raises:
ValueError: if min_depth > max_depth.
"""
super(ConvolutionalBoxPredictor, self).__init__(is_training, num_classes)
if min_depth > max_depth:
raise ValueError('min_depth should be less than or equal to max_depth')
self._conv_hyperparams_fn = conv_hyperparams_fn
self._min_depth = min_depth
self._max_depth = max_depth
self._num_layers_before_predictor = num_layers_before_predictor
self._use_dropout = use_dropout
self._kernel_size = kernel_size
self._box_code_size = box_code_size
self._dropout_keep_prob = dropout_keep_prob
self._apply_sigmoid_to_scores = apply_sigmoid_to_scores
self._class_prediction_bias_init = class_prediction_bias_init
self._use_depthwise = use_depthwise
def _predict(self, image_features, num_predictions_per_location_list):
"""Computes encoded object locations and corresponding confidences.
Args:
image_features: A list of float tensors of shape [batch_size, height_i,
width_i, channels_i] containing features for a batch of images.
num_predictions_per_location_list: A list of integers representing the
number of box predictions to be made per spatial location for each
feature map.
Returns:
box_encodings: A list of float tensors of shape
[batch_size, num_anchors_i, q, code_size] representing the location of
the objects, where q is 1 or the number of classes. Each entry in the
list corresponds to a feature map in the input `image_features` list.
class_predictions_with_background: A list of float tensors of shape
[batch_size, num_anchors_i, num_classes + 1] representing the class
predictions for the proposals. Each entry in the list corresponds to a
feature map in the input `image_features` list.
"""
box_encodings_list = []
class_predictions_list = []
# TODO(rathodv): Come up with a better way to generate scope names
# in box predictor once we have time to retrain all models in the zoo.
# The following lines create scope names to be backwards compatible with the
# existing checkpoints.
box_predictor_scopes = [_NoopVariableScope()]
if len(image_features) > 1:
box_predictor_scopes = [
tf.variable_scope('BoxPredictor_{}'.format(i))
for i in range(len(image_features))
]
for (image_feature,
num_predictions_per_location, box_predictor_scope) in zip(
image_features, num_predictions_per_location_list,
box_predictor_scopes):
with box_predictor_scope:
# Add a slot for the background class.
num_class_slots = self.num_classes + 1
net = image_feature
with slim.arg_scope(self._conv_hyperparams_fn()), \
slim.arg_scope([slim.dropout], is_training=self._is_training):
# Add additional conv layers before the class predictor.
features_depth = static_shape.get_depth(image_feature.get_shape())
depth = max(min(features_depth, self._max_depth), self._min_depth)
tf.logging.info('depth of additional conv before box predictor: {}'.
format(depth))
if depth > 0 and self._num_layers_before_predictor > 0:
for i in range(self._num_layers_before_predictor):
net = slim.conv2d(
net, depth, [1, 1], scope='Conv2d_%d_1x1_%d' % (i, depth))
with slim.arg_scope([slim.conv2d], activation_fn=None,
normalizer_fn=None, normalizer_params=None):
if self._use_depthwise:
box_encodings = slim.separable_conv2d(
net, None, [self._kernel_size, self._kernel_size],
padding='SAME', depth_multiplier=1, stride=1,
rate=1, scope='BoxEncodingPredictor_depthwise')
box_encodings = slim.conv2d(
box_encodings,
num_predictions_per_location * self._box_code_size, [1, 1],
scope='BoxEncodingPredictor')
else:
box_encodings = slim.conv2d(
net, num_predictions_per_location * self._box_code_size,
[self._kernel_size, self._kernel_size],
scope='BoxEncodingPredictor')
if self._use_dropout:
net = slim.dropout(net, keep_prob=self._dropout_keep_prob)
if self._use_depthwise:
class_predictions_with_background = slim.separable_conv2d(
net, None, [self._kernel_size, self._kernel_size],
padding='SAME', depth_multiplier=1, stride=1,
rate=1, scope='ClassPredictor_depthwise')
class_predictions_with_background = slim.conv2d(
class_predictions_with_background,
num_predictions_per_location * num_class_slots,
[1, 1], scope='ClassPredictor')
else:
class_predictions_with_background = slim.conv2d(
net, num_predictions_per_location * num_class_slots,
[self._kernel_size, self._kernel_size],
scope='ClassPredictor',
biases_initializer=tf.constant_initializer(
self._class_prediction_bias_init))
if self._apply_sigmoid_to_scores:
class_predictions_with_background = tf.sigmoid(
class_predictions_with_background)
combined_feature_map_shape = (shape_utils.
combined_static_and_dynamic_shape(
image_feature))
box_encodings = tf.reshape(
box_encodings, tf.stack([combined_feature_map_shape[0],
combined_feature_map_shape[1] *
combined_feature_map_shape[2] *
num_predictions_per_location,
1, self._box_code_size]))
box_encodings_list.append(box_encodings)
class_predictions_with_background = tf.reshape(
class_predictions_with_background,
tf.stack([combined_feature_map_shape[0],
combined_feature_map_shape[1] *
combined_feature_map_shape[2] *
num_predictions_per_location,
num_class_slots]))
class_predictions_list.append(class_predictions_with_background)
return {
BOX_ENCODINGS: box_encodings_list,
CLASS_PREDICTIONS_WITH_BACKGROUND: class_predictions_list
}
# TODO(rathodv): Replace with slim.arg_scope_func_key once its available
# externally.
def _arg_scope_func_key(op):
"""Returns a key that can be used to index arg_scope dictionary."""
return getattr(op, '_key_op', str(op))
# TODO(rathodv): Merge the implementation with ConvolutionalBoxPredictor above
# since they are very similar.
class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
"""Convolutional Box Predictor with weight sharing.
Defines the box predictor as defined in
https://arxiv.org/abs/1708.02002. This class differs from
ConvolutionalBoxPredictor in that it shares weights and biases while
predicting from different feature maps. However, batch_norm parameters are not
shared because the statistics of the activations vary among the different
feature maps.
Also note that separate multi-layer towers are constructed for the box
encoding and class predictors respectively.
"""
def __init__(self,
is_training,
num_classes,
conv_hyperparams_fn,
depth,
num_layers_before_predictor,
box_code_size,
kernel_size=3,
class_prediction_bias_init=0.0,
use_dropout=False,
dropout_keep_prob=0.8):
"""Constructor.
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: number of classes. Note that num_classes *does not*
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
conv_hyperparams_fn: A function to generate tf-slim arg_scope with
hyperparameters for convolution ops.
depth: depth of conv layers.
num_layers_before_predictor: Number of the additional conv layers before
the predictor.
box_code_size: Size of encoding for each box.
kernel_size: Size of final convolution kernel.
class_prediction_bias_init: constant value to initialize bias of the last
conv2d layer before class prediction.
use_dropout: Whether to apply dropout to class prediction head.
dropout_keep_prob: Probability of keeping activiations.
"""
super(WeightSharedConvolutionalBoxPredictor, self).__init__(is_training,
num_classes)
self._conv_hyperparams_fn = conv_hyperparams_fn
self._depth = depth
self._num_layers_before_predictor = num_layers_before_predictor
self._box_code_size = box_code_size
self._kernel_size = kernel_size
self._class_prediction_bias_init = class_prediction_bias_init
self._use_dropout = use_dropout
self._dropout_keep_prob = dropout_keep_prob
def _predict(self, image_features, num_predictions_per_location_list):
"""Computes encoded object locations and corresponding confidences.
Args:
image_features: A list of float tensors of shape [batch_size, height_i,
width_i, channels] containing features for a batch of images. Note that
all tensors in the list must have the same number of channels.
num_predictions_per_location_list: A list of integers representing the
number of box predictions to be made per spatial location for each
feature map. Note that all values must be the same since the weights are
shared.
Returns:
box_encodings: A list of float tensors of shape
[batch_size, num_anchors_i, code_size] representing the location of
the objects. Each entry in the list corresponds to a feature map in the
input `image_features` list.
class_predictions_with_background: A list of float tensors of shape
[batch_size, num_anchors_i, num_classes + 1] representing the class
predictions for the proposals. Each entry in the list corresponds to a
feature map in the input `image_features` list.
Raises:
ValueError: If the image feature maps do not have the same number of
channels or if the num predictions per locations is differs between the
feature maps.
"""
if len(set(num_predictions_per_location_list)) > 1:
raise ValueError('num predictions per location must be same for all'
'feature maps, found: {}'.format(
num_predictions_per_location_list))
feature_channels = [
image_feature.shape[3].value for image_feature in image_features
]
if len(set(feature_channels)) > 1:
raise ValueError('all feature maps must have the same number of '
'channels, found: {}'.format(feature_channels))
box_encodings_list = []
class_predictions_list = []
for feature_index, (image_feature,
num_predictions_per_location) in enumerate(
zip(image_features,
num_predictions_per_location_list)):
# Add a slot for the background class.
with tf.variable_scope('WeightSharedConvolutionalBoxPredictor',
reuse=tf.AUTO_REUSE):
num_class_slots = self.num_classes + 1
box_encodings_net = image_feature
class_predictions_net = image_feature
with slim.arg_scope(self._conv_hyperparams_fn()) as sc:
apply_batch_norm = _arg_scope_func_key(slim.batch_norm) in sc
for i in range(self._num_layers_before_predictor):
box_encodings_net = slim.conv2d(
box_encodings_net,
self._depth,
[self._kernel_size, self._kernel_size],
stride=1,
padding='SAME',
activation_fn=None,
normalizer_fn=(tf.identity if apply_batch_norm else None),
scope='BoxPredictionTower/conv2d_{}'.format(i))
if apply_batch_norm:
box_encodings_net = slim.batch_norm(
box_encodings_net,
scope='BoxPredictionTower/conv2d_{}/BatchNorm/feature_{}'.
format(i, feature_index))
box_encodings_net = tf.nn.relu6(box_encodings_net)
box_encodings = slim.conv2d(
box_encodings_net,
num_predictions_per_location * self._box_code_size,
[self._kernel_size, self._kernel_size],
activation_fn=None, stride=1, padding='SAME',
normalizer_fn=None,
scope='BoxPredictor')
for i in range(self._num_layers_before_predictor):
class_predictions_net = slim.conv2d(
class_predictions_net,
self._depth,
[self._kernel_size, self._kernel_size],
stride=1,
padding='SAME',
activation_fn=None,
normalizer_fn=(tf.identity if apply_batch_norm else None),
scope='ClassPredictionTower/conv2d_{}'.format(i))
if apply_batch_norm:
class_predictions_net = slim.batch_norm(
class_predictions_net,
scope='ClassPredictionTower/conv2d_{}/BatchNorm/feature_{}'
.format(i, feature_index))
class_predictions_net = tf.nn.relu6(class_predictions_net)
if self._use_dropout:
class_predictions_net = slim.dropout(
class_predictions_net, keep_prob=self._dropout_keep_prob)
class_predictions_with_background = slim.conv2d(
class_predictions_net,
num_predictions_per_location * num_class_slots,
[self._kernel_size, self._kernel_size],
activation_fn=None, stride=1, padding='SAME',
normalizer_fn=None,
biases_initializer=tf.constant_initializer(
self._class_prediction_bias_init),
scope='ClassPredictor')
combined_feature_map_shape = (shape_utils.
combined_static_and_dynamic_shape(
image_feature))
box_encodings = tf.reshape(
box_encodings, tf.stack([combined_feature_map_shape[0],
combined_feature_map_shape[1] *
combined_feature_map_shape[2] *
num_predictions_per_location,
self._box_code_size]))
box_encodings_list.append(box_encodings)
class_predictions_with_background = tf.reshape(
class_predictions_with_background,
tf.stack([combined_feature_map_shape[0],
combined_feature_map_shape[1] *
combined_feature_map_shape[2] *
num_predictions_per_location,
num_class_slots]))
class_predictions_list.append(class_predictions_with_background)
return {
BOX_ENCODINGS: box_encodings_list,
CLASS_PREDICTIONS_WITH_BACKGROUND: class_predictions_list
}
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.box_predictor."""
import numpy as np
import tensorflow as tf
from google.protobuf import text_format
from object_detection.builders import hyperparams_builder
from object_detection.core import box_predictor
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
class MaskRCNNBoxPredictorTest(tf.test.TestCase):
def _build_arg_scope_with_hyperparams(self,
op_type=hyperparams_pb2.Hyperparams.FC):
hyperparams = hyperparams_pb2.Hyperparams()
hyperparams_text_proto = """
activation: NONE
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
"""
text_format.Merge(hyperparams_text_proto, hyperparams)
hyperparams.op = op_type
return hyperparams_builder.build(hyperparams, is_training=True)
def test_get_boxes_with_five_classes(self):
image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32)
mask_box_predictor = box_predictor.MaskRCNNBoxPredictor(
is_training=False,
num_classes=5,
fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
use_dropout=False,
dropout_keep_prob=0.5,
box_code_size=4,
)
box_predictions = mask_box_predictor.predict(
[image_features], num_predictions_per_location=[1],
scope='BoxPredictor')
box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
class_predictions_with_background = box_predictions[
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
(box_encodings_shape,
class_predictions_with_background_shape) = sess.run(
[tf.shape(box_encodings),
tf.shape(class_predictions_with_background)])
self.assertAllEqual(box_encodings_shape, [2, 1, 5, 4])
self.assertAllEqual(class_predictions_with_background_shape, [2, 1, 6])
def test_get_boxes_with_five_classes_share_box_across_classes(self):
image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32)
mask_box_predictor = box_predictor.MaskRCNNBoxPredictor(
is_training=False,
num_classes=5,
fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
use_dropout=False,
dropout_keep_prob=0.5,
box_code_size=4,
share_box_across_classes=True
)
box_predictions = mask_box_predictor.predict(
[image_features], num_predictions_per_location=[1],
scope='BoxPredictor')
box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
class_predictions_with_background = box_predictions[
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
(box_encodings_shape,
class_predictions_with_background_shape) = sess.run(
[tf.shape(box_encodings),
tf.shape(class_predictions_with_background)])
self.assertAllEqual(box_encodings_shape, [2, 1, 1, 4])
self.assertAllEqual(class_predictions_with_background_shape, [2, 1, 6])
def test_value_error_on_predict_instance_masks_with_no_conv_hyperparms(self):
with self.assertRaises(ValueError):
box_predictor.MaskRCNNBoxPredictor(
is_training=False,
num_classes=5,
fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
use_dropout=False,
dropout_keep_prob=0.5,
box_code_size=4,
predict_instance_masks=True)
def test_get_instance_masks(self):
image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32)
mask_box_predictor = box_predictor.MaskRCNNBoxPredictor(
is_training=False,
num_classes=5,
fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
use_dropout=False,
dropout_keep_prob=0.5,
box_code_size=4,
conv_hyperparams_fn=self._build_arg_scope_with_hyperparams(
op_type=hyperparams_pb2.Hyperparams.CONV),
predict_instance_masks=True)
box_predictions = mask_box_predictor.predict(
[image_features],
num_predictions_per_location=[1],
scope='BoxPredictor',
predict_boxes_and_classes=True,
predict_auxiliary_outputs=True)
mask_predictions = box_predictions[box_predictor.MASK_PREDICTIONS]
self.assertListEqual([2, 1, 5, 14, 14],
mask_predictions.get_shape().as_list())
def test_do_not_return_instance_masks_without_request(self):
image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32)
mask_box_predictor = box_predictor.MaskRCNNBoxPredictor(
is_training=False,
num_classes=5,
fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
use_dropout=False,
dropout_keep_prob=0.5,
box_code_size=4)
box_predictions = mask_box_predictor.predict(
[image_features], num_predictions_per_location=[1],
scope='BoxPredictor')
self.assertEqual(len(box_predictions), 2)
self.assertTrue(box_predictor.BOX_ENCODINGS in box_predictions)
self.assertTrue(box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND
in box_predictions)
def test_value_error_on_predict_keypoints(self):
with self.assertRaises(ValueError):
box_predictor.MaskRCNNBoxPredictor(
is_training=False,
num_classes=5,
fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
use_dropout=False,
dropout_keep_prob=0.5,
box_code_size=4,
predict_keypoints=True)
class RfcnBoxPredictorTest(tf.test.TestCase):
def _build_arg_scope_with_conv_hyperparams(self):
conv_hyperparams = hyperparams_pb2.Hyperparams()
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
"""
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.build(conv_hyperparams, is_training=True)
def test_get_correct_box_encoding_and_class_prediction_shapes(self):
image_features = tf.random_uniform([4, 8, 8, 64], dtype=tf.float32)
proposal_boxes = tf.random_normal([4, 2, 4], dtype=tf.float32)
rfcn_box_predictor = box_predictor.RfcnBoxPredictor(
is_training=False,
num_classes=2,
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
num_spatial_bins=[3, 3],
depth=4,
crop_size=[12, 12],
box_code_size=4
)
box_predictions = rfcn_box_predictor.predict(
[image_features], num_predictions_per_location=[1],
scope='BoxPredictor',
proposal_boxes=proposal_boxes)
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
class_predictions_with_background = tf.concat(
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
axis=1)
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
(box_encodings_shape,
class_predictions_shape) = sess.run(
[tf.shape(box_encodings),
tf.shape(class_predictions_with_background)])
self.assertAllEqual(box_encodings_shape, [8, 1, 2, 4])
self.assertAllEqual(class_predictions_shape, [8, 1, 3])
class ConvolutionalBoxPredictorTest(test_case.TestCase):
def _build_arg_scope_with_conv_hyperparams(self):
conv_hyperparams = hyperparams_pb2.Hyperparams()
conv_hyperparams_text_proto = """
activation: RELU_6
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
"""
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.build(conv_hyperparams, is_training=True)
def test_get_boxes_for_five_aspect_ratios_per_location(self):
def graph_fn(image_features):
conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
is_training=False,
num_classes=0,
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
min_depth=0,
max_depth=32,
num_layers_before_predictor=1,
use_dropout=True,
dropout_keep_prob=0.8,
kernel_size=1,
box_code_size=4
)
box_predictions = conv_box_predictor.predict(
[image_features], num_predictions_per_location=[5],
scope='BoxPredictor')
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
objectness_predictions = tf.concat(
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
axis=1)
return (box_encodings, objectness_predictions)
image_features = np.random.rand(4, 8, 8, 64).astype(np.float32)
(box_encodings, objectness_predictions) = self.execute(graph_fn,
[image_features])
self.assertAllEqual(box_encodings.shape, [4, 320, 1, 4])
self.assertAllEqual(objectness_predictions.shape, [4, 320, 1])
def test_get_boxes_for_one_aspect_ratio_per_location(self):
def graph_fn(image_features):
conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
is_training=False,
num_classes=0,
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
min_depth=0,
max_depth=32,
num_layers_before_predictor=1,
use_dropout=True,
dropout_keep_prob=0.8,
kernel_size=1,
box_code_size=4
)
box_predictions = conv_box_predictor.predict(
[image_features], num_predictions_per_location=[1],
scope='BoxPredictor')
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
objectness_predictions = tf.concat(box_predictions[
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1)
return (box_encodings, objectness_predictions)
image_features = np.random.rand(4, 8, 8, 64).astype(np.float32)
(box_encodings, objectness_predictions) = self.execute(graph_fn,
[image_features])
self.assertAllEqual(box_encodings.shape, [4, 64, 1, 4])
self.assertAllEqual(objectness_predictions.shape, [4, 64, 1])
def test_get_multi_class_predictions_for_five_aspect_ratios_per_location(
self):
num_classes_without_background = 6
image_features = np.random.rand(4, 8, 8, 64).astype(np.float32)
def graph_fn(image_features):
conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
is_training=False,
num_classes=num_classes_without_background,
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
min_depth=0,
max_depth=32,
num_layers_before_predictor=1,
use_dropout=True,
dropout_keep_prob=0.8,
kernel_size=1,
box_code_size=4
)
box_predictions = conv_box_predictor.predict(
[image_features],
num_predictions_per_location=[5],
scope='BoxPredictor')
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
class_predictions_with_background = tf.concat(
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
axis=1)
return (box_encodings, class_predictions_with_background)
(box_encodings,
class_predictions_with_background) = self.execute(graph_fn,
[image_features])
self.assertAllEqual(box_encodings.shape, [4, 320, 1, 4])
self.assertAllEqual(class_predictions_with_background.shape,
[4, 320, num_classes_without_background+1])
def test_get_predictions_with_feature_maps_of_dynamic_shape(
self):
image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64])
conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
is_training=False,
num_classes=0,
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
min_depth=0,
max_depth=32,
num_layers_before_predictor=1,
use_dropout=True,
dropout_keep_prob=0.8,
kernel_size=1,
box_code_size=4
)
box_predictions = conv_box_predictor.predict(
[image_features], num_predictions_per_location=[5],
scope='BoxPredictor')
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
objectness_predictions = tf.concat(
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
axis=1)
init_op = tf.global_variables_initializer()
resolution = 32
expected_num_anchors = resolution*resolution*5
with self.test_session() as sess:
sess.run(init_op)
(box_encodings_shape,
objectness_predictions_shape) = sess.run(
[tf.shape(box_encodings), tf.shape(objectness_predictions)],
feed_dict={image_features:
np.random.rand(4, resolution, resolution, 64)})
actual_variable_set = set(
[var.op.name for var in tf.trainable_variables()])
self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4])
self.assertAllEqual(objectness_predictions_shape,
[4, expected_num_anchors, 1])
expected_variable_set = set([
'BoxPredictor/Conv2d_0_1x1_32/biases',
'BoxPredictor/Conv2d_0_1x1_32/weights',
'BoxPredictor/BoxEncodingPredictor/biases',
'BoxPredictor/BoxEncodingPredictor/weights',
'BoxPredictor/ClassPredictor/biases',
'BoxPredictor/ClassPredictor/weights'])
self.assertEqual(expected_variable_set, actual_variable_set)
def test_use_depthwise_convolution(self):
image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64])
conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
is_training=False,
num_classes=0,
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
min_depth=0,
max_depth=32,
num_layers_before_predictor=1,
dropout_keep_prob=0.8,
kernel_size=1,
box_code_size=4,
use_dropout=True,
use_depthwise=True
)
box_predictions = conv_box_predictor.predict(
[image_features], num_predictions_per_location=[5],
scope='BoxPredictor')
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
objectness_predictions = tf.concat(
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
axis=1)
init_op = tf.global_variables_initializer()
resolution = 32
expected_num_anchors = resolution*resolution*5
with self.test_session() as sess:
sess.run(init_op)
(box_encodings_shape,
objectness_predictions_shape) = sess.run(
[tf.shape(box_encodings), tf.shape(objectness_predictions)],
feed_dict={image_features:
np.random.rand(4, resolution, resolution, 64)})
actual_variable_set = set(
[var.op.name for var in tf.trainable_variables()])
self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4])
self.assertAllEqual(objectness_predictions_shape,
[4, expected_num_anchors, 1])
expected_variable_set = set([
'BoxPredictor/Conv2d_0_1x1_32/biases',
'BoxPredictor/Conv2d_0_1x1_32/weights',
'BoxPredictor/BoxEncodingPredictor_depthwise/biases',
'BoxPredictor/BoxEncodingPredictor_depthwise/depthwise_weights',
'BoxPredictor/BoxEncodingPredictor/biases',
'BoxPredictor/BoxEncodingPredictor/weights',
'BoxPredictor/ClassPredictor_depthwise/biases',
'BoxPredictor/ClassPredictor_depthwise/depthwise_weights',
'BoxPredictor/ClassPredictor/biases',
'BoxPredictor/ClassPredictor/weights'])
self.assertEqual(expected_variable_set, actual_variable_set)
class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
def _build_arg_scope_with_conv_hyperparams(self):
conv_hyperparams = hyperparams_pb2.Hyperparams()
conv_hyperparams_text_proto = """
activation: RELU_6
regularizer {
l2_regularizer {
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
train: true,
}
"""
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.build(conv_hyperparams, is_training=True)
def _build_conv_arg_scope_no_batch_norm(self):
conv_hyperparams = hyperparams_pb2.Hyperparams()
conv_hyperparams_text_proto = """
activation: RELU_6
regularizer {
l2_regularizer {
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
"""
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.build(conv_hyperparams, is_training=True)
def test_get_boxes_for_five_aspect_ratios_per_location(self):
def graph_fn(image_features):
conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
is_training=False,
num_classes=0,
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
depth=32,
num_layers_before_predictor=1,
box_code_size=4)
box_predictions = conv_box_predictor.predict(
[image_features], num_predictions_per_location=[5],
scope='BoxPredictor')
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
objectness_predictions = tf.concat(box_predictions[
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1)
return (box_encodings, objectness_predictions)
image_features = np.random.rand(4, 8, 8, 64).astype(np.float32)
(box_encodings, objectness_predictions) = self.execute(
graph_fn, [image_features])
self.assertAllEqual(box_encodings.shape, [4, 320, 4])
self.assertAllEqual(objectness_predictions.shape, [4, 320, 1])
def test_bias_predictions_to_background_with_sigmoid_score_conversion(self):
def graph_fn(image_features):
conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
is_training=True,
num_classes=2,
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
depth=32,
num_layers_before_predictor=1,
class_prediction_bias_init=-4.6,
box_code_size=4)
box_predictions = conv_box_predictor.predict(
[image_features], num_predictions_per_location=[5],
scope='BoxPredictor')
class_predictions = tf.concat(box_predictions[
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1)
return (tf.nn.sigmoid(class_predictions),)
image_features = np.random.rand(4, 8, 8, 64).astype(np.float32)
class_predictions = self.execute(graph_fn, [image_features])
self.assertAlmostEqual(np.mean(class_predictions), 0.01, places=3)
def test_get_multi_class_predictions_for_five_aspect_ratios_per_location(
self):
num_classes_without_background = 6
def graph_fn(image_features):
conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
is_training=False,
num_classes=num_classes_without_background,
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
depth=32,
num_layers_before_predictor=1,
box_code_size=4)
box_predictions = conv_box_predictor.predict(
[image_features],
num_predictions_per_location=[5],
scope='BoxPredictor')
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
class_predictions_with_background = tf.concat(box_predictions[
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1)
return (box_encodings, class_predictions_with_background)
image_features = np.random.rand(4, 8, 8, 64).astype(np.float32)
(box_encodings, class_predictions_with_background) = self.execute(
graph_fn, [image_features])
self.assertAllEqual(box_encodings.shape, [4, 320, 4])
self.assertAllEqual(class_predictions_with_background.shape,
[4, 320, num_classes_without_background+1])
def test_get_multi_class_predictions_from_two_feature_maps(
self):
num_classes_without_background = 6
def graph_fn(image_features1, image_features2):
conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
is_training=False,
num_classes=num_classes_without_background,
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
depth=32,
num_layers_before_predictor=1,
box_code_size=4)
box_predictions = conv_box_predictor.predict(
[image_features1, image_features2],
num_predictions_per_location=[5, 5],
scope='BoxPredictor')
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
class_predictions_with_background = tf.concat(
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
axis=1)
return (box_encodings, class_predictions_with_background)
image_features1 = np.random.rand(4, 8, 8, 64).astype(np.float32)
image_features2 = np.random.rand(4, 8, 8, 64).astype(np.float32)
(box_encodings, class_predictions_with_background) = self.execute(
graph_fn, [image_features1, image_features2])
self.assertAllEqual(box_encodings.shape, [4, 640, 4])
self.assertAllEqual(class_predictions_with_background.shape,
[4, 640, num_classes_without_background+1])
def test_predictions_from_multiple_feature_maps_share_weights_not_batchnorm(
self):
num_classes_without_background = 6
def graph_fn(image_features1, image_features2):
conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
is_training=False,
num_classes=num_classes_without_background,
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
depth=32,
num_layers_before_predictor=2,
box_code_size=4)
box_predictions = conv_box_predictor.predict(
[image_features1, image_features2],
num_predictions_per_location=[5, 5],
scope='BoxPredictor')
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
class_predictions_with_background = tf.concat(
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
axis=1)
return (box_encodings, class_predictions_with_background)
with self.test_session(graph=tf.Graph()):
graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32),
tf.random_uniform([4, 16, 16, 3], dtype=tf.float32))
actual_variable_set = set(
[var.op.name for var in tf.trainable_variables()])
expected_variable_set = set([
# Box prediction tower
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_0/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_0/BatchNorm/feature_0/beta'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_0/BatchNorm/feature_1/beta'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_1/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_1/BatchNorm/feature_0/beta'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_1/BatchNorm/feature_1/beta'),
# Box prediction head
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictor/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictor/biases'),
# Class prediction tower
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_0/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_0/BatchNorm/feature_0/beta'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_0/BatchNorm/feature_1/beta'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_1/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_1/BatchNorm/feature_0/beta'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_1/BatchNorm/feature_1/beta'),
# Class prediction head
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictor/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictor/biases')])
self.assertEqual(expected_variable_set, actual_variable_set)
def test_no_batchnorm_params_when_batchnorm_is_not_configured(self):
num_classes_without_background = 6
def graph_fn(image_features1, image_features2):
conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
is_training=False,
num_classes=num_classes_without_background,
conv_hyperparams_fn=self._build_conv_arg_scope_no_batch_norm(),
depth=32,
num_layers_before_predictor=2,
box_code_size=4)
box_predictions = conv_box_predictor.predict(
[image_features1, image_features2],
num_predictions_per_location=[5, 5],
scope='BoxPredictor')
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
class_predictions_with_background = tf.concat(
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
axis=1)
return (box_encodings, class_predictions_with_background)
with self.test_session(graph=tf.Graph()):
graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32),
tf.random_uniform([4, 16, 16, 3], dtype=tf.float32))
actual_variable_set = set(
[var.op.name for var in tf.trainable_variables()])
expected_variable_set = set([
# Box prediction tower
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_0/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_0/biases'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_1/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_1/biases'),
# Box prediction head
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictor/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictor/biases'),
# Class prediction tower
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_0/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_0/biases'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_1/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_1/biases'),
# Class prediction head
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictor/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictor/biases')])
self.assertEqual(expected_variable_set, actual_variable_set)
def test_get_predictions_with_feature_maps_of_dynamic_shape(
self):
image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64])
conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
is_training=False,
num_classes=0,
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
depth=32,
num_layers_before_predictor=1,
box_code_size=4)
box_predictions = conv_box_predictor.predict(
[image_features], num_predictions_per_location=[5],
scope='BoxPredictor')
box_encodings = tf.concat(box_predictions[box_predictor.BOX_ENCODINGS],
axis=1)
objectness_predictions = tf.concat(box_predictions[
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1)
init_op = tf.global_variables_initializer()
resolution = 32
expected_num_anchors = resolution*resolution*5
with self.test_session() as sess:
sess.run(init_op)
(box_encodings_shape,
objectness_predictions_shape) = sess.run(
[tf.shape(box_encodings), tf.shape(objectness_predictions)],
feed_dict={image_features:
np.random.rand(4, resolution, resolution, 64)})
self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 4])
self.assertAllEqual(objectness_predictions_shape,
[4, expected_num_anchors, 1])
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Interface for data decoders.
Data decoders decode the input data and return a dictionary of tensors keyed by
the entries in core.reader.Fields.
"""
from abc import ABCMeta
from abc import abstractmethod
class DataDecoder(object):
"""Interface for data decoders."""
__metaclass__ = ABCMeta
@abstractmethod
def decode(self, data):
"""Return a single image and associated labels.
Args:
data: a string tensor holding a serialized protocol buffer corresponding
to data for a single image.
Returns:
tensor_dict: a dictionary containing tensors. Possible keys are defined in
reader.Fields.
"""
pass
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Interface for data parsers.
Data parser parses input data and returns a dictionary of numpy arrays
keyed by the entries in standard_fields.py. Since the parser parses records
to numpy arrays (materialized tensors) directly, it is used to read data for
evaluation/visualization; to parse the data during training, DataDecoder should
be used.
"""
from abc import ABCMeta
from abc import abstractmethod
class DataToNumpyParser(object):
__metaclass__ = ABCMeta
@abstractmethod
def parse(self, input_data):
"""Parses input and returns a numpy array or a dictionary of numpy arrays.
Args:
input_data: an input data
Returns:
A numpy array or a dictionary of numpy arrays or None, if input
cannot be parsed.
"""
pass
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Keypoint operations.
Keypoints are represented as tensors of shape [num_instances, num_keypoints, 2],
where the last dimension holds rank 2 tensors of the form [y, x] representing
the coordinates of the keypoint.
"""
import numpy as np
import tensorflow as tf
def scale(keypoints, y_scale, x_scale, scope=None):
"""Scales keypoint coordinates in x and y dimensions.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
y_scale: (float) scalar tensor
x_scale: (float) scalar tensor
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
with tf.name_scope(scope, 'Scale'):
y_scale = tf.cast(y_scale, tf.float32)
x_scale = tf.cast(x_scale, tf.float32)
new_keypoints = keypoints * [[[y_scale, x_scale]]]
return new_keypoints
def clip_to_window(keypoints, window, scope=None):
"""Clips keypoints to a window.
This op clips any input keypoints to a window.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
window to which the op should clip the keypoints.
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
with tf.name_scope(scope, 'ClipToWindow'):
y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
y = tf.maximum(tf.minimum(y, win_y_max), win_y_min)
x = tf.maximum(tf.minimum(x, win_x_max), win_x_min)
new_keypoints = tf.concat([y, x], 2)
return new_keypoints
def prune_outside_window(keypoints, window, scope=None):
"""Prunes keypoints that fall outside a given window.
This function replaces keypoints that fall outside the given window with nan.
See also clip_to_window which clips any keypoints that fall outside the given
window.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
window outside of which the op should prune the keypoints.
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
with tf.name_scope(scope, 'PruneOutsideWindow'):
y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
valid_indices = tf.logical_and(
tf.logical_and(y >= win_y_min, y <= win_y_max),
tf.logical_and(x >= win_x_min, x <= win_x_max))
new_y = tf.where(valid_indices, y, np.nan * tf.ones_like(y))
new_x = tf.where(valid_indices, x, np.nan * tf.ones_like(x))
new_keypoints = tf.concat([new_y, new_x], 2)
return new_keypoints
def change_coordinate_frame(keypoints, window, scope=None):
"""Changes coordinate frame of the keypoints to be relative to window's frame.
Given a window of the form [y_min, x_min, y_max, x_max], changes keypoint
coordinates from keypoints of shape [num_instances, num_keypoints, 2]
to be relative to this window.
An example use case is data augmentation: where we are given groundtruth
keypoints and would like to randomly crop the image to some window. In this
case we need to change the coordinate frame of each groundtruth keypoint to be
relative to this new window.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
window we should change the coordinate frame to.
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
with tf.name_scope(scope, 'ChangeCoordinateFrame'):
win_height = window[2] - window[0]
win_width = window[3] - window[1]
new_keypoints = scale(keypoints - [window[0], window[1]], 1.0 / win_height,
1.0 / win_width)
return new_keypoints
def to_normalized_coordinates(keypoints, height, width,
check_range=True, scope=None):
"""Converts absolute keypoint coordinates to normalized coordinates in [0, 1].
Usually one uses the dynamic shape of the image or conv-layer tensor:
keypoints = keypoint_ops.to_normalized_coordinates(keypoints,
tf.shape(images)[1],
tf.shape(images)[2]),
This function raises an assertion failed error at graph execution time when
the maximum coordinate is smaller than 1.01 (which means that coordinates are
already normalized). The value 1.01 is to deal with small rounding errors.
Args:
keypoints: A tensor of shape [num_instances, num_keypoints, 2].
height: Maximum value for y coordinate of absolute keypoint coordinates.
width: Maximum value for x coordinate of absolute keypoint coordinates.
check_range: If True, checks if the coordinates are normalized.
scope: name scope.
Returns:
tensor of shape [num_instances, num_keypoints, 2] with normalized
coordinates in [0, 1].
"""
with tf.name_scope(scope, 'ToNormalizedCoordinates'):
height = tf.cast(height, tf.float32)
width = tf.cast(width, tf.float32)
if check_range:
max_val = tf.reduce_max(keypoints)
max_assert = tf.Assert(tf.greater(max_val, 1.01),
['max value is lower than 1.01: ', max_val])
with tf.control_dependencies([max_assert]):
width = tf.identity(width)
return scale(keypoints, 1.0 / height, 1.0 / width)
def to_absolute_coordinates(keypoints, height, width,
check_range=True, scope=None):
"""Converts normalized keypoint coordinates to absolute pixel coordinates.
This function raises an assertion failed error when the maximum keypoint
coordinate value is larger than 1.01 (in which case coordinates are already
absolute).
Args:
keypoints: A tensor of shape [num_instances, num_keypoints, 2]
height: Maximum value for y coordinate of absolute keypoint coordinates.
width: Maximum value for x coordinate of absolute keypoint coordinates.
check_range: If True, checks if the coordinates are normalized or not.
scope: name scope.
Returns:
tensor of shape [num_instances, num_keypoints, 2] with absolute coordinates
in terms of the image size.
"""
with tf.name_scope(scope, 'ToAbsoluteCoordinates'):
height = tf.cast(height, tf.float32)
width = tf.cast(width, tf.float32)
# Ensure range of input keypoints is correct.
if check_range:
max_val = tf.reduce_max(keypoints)
max_assert = tf.Assert(tf.greater_equal(1.01, max_val),
['maximum keypoint coordinate value is larger '
'than 1.01: ', max_val])
with tf.control_dependencies([max_assert]):
width = tf.identity(width)
return scale(keypoints, height, width)
def flip_horizontal(keypoints, flip_point, flip_permutation, scope=None):
"""Flips the keypoints horizontally around the flip_point.
This operation flips the x coordinate for each keypoint around the flip_point
and also permutes the keypoints in a manner specified by flip_permutation.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
flip_point: (float) scalar tensor representing the x coordinate to flip the
keypoints around.
flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation. This specifies the mapping from original keypoint indices
to the flipped keypoint indices. This is used primarily for keypoints
that are not reflection invariant. E.g. Suppose there are 3 keypoints
representing ['head', 'right_eye', 'left_eye'], then a logical choice for
flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
and 'right_eye' after a horizontal flip.
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
with tf.name_scope(scope, 'FlipHorizontal'):
keypoints = tf.transpose(keypoints, [1, 0, 2])
keypoints = tf.gather(keypoints, flip_permutation)
v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
u = flip_point * 2.0 - u
new_keypoints = tf.concat([v, u], 2)
new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
return new_keypoints
def flip_vertical(keypoints, flip_point, flip_permutation, scope=None):
"""Flips the keypoints vertically around the flip_point.
This operation flips the y coordinate for each keypoint around the flip_point
and also permutes the keypoints in a manner specified by flip_permutation.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
flip_point: (float) scalar tensor representing the y coordinate to flip the
keypoints around.
flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation. This specifies the mapping from original keypoint indices
to the flipped keypoint indices. This is used primarily for keypoints
that are not reflection invariant. E.g. Suppose there are 3 keypoints
representing ['head', 'right_eye', 'left_eye'], then a logical choice for
flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
and 'right_eye' after a horizontal flip.
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
with tf.name_scope(scope, 'FlipVertical'):
keypoints = tf.transpose(keypoints, [1, 0, 2])
keypoints = tf.gather(keypoints, flip_permutation)
v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
v = flip_point * 2.0 - v
new_keypoints = tf.concat([v, u], 2)
new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
return new_keypoints
def rot90(keypoints, scope=None):
"""Rotates the keypoints counter-clockwise by 90 degrees.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
with tf.name_scope(scope, 'Rot90'):
keypoints = tf.transpose(keypoints, [1, 0, 2])
v, u = tf.split(value=keypoints[:, :, ::-1], num_or_size_splits=2, axis=2)
v = 1.0 - v
new_keypoints = tf.concat([v, u], 2)
new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
return new_keypoints
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.keypoint_ops."""
import numpy as np
import tensorflow as tf
from object_detection.core import keypoint_ops
class KeypointOpsTest(tf.test.TestCase):
"""Tests for common keypoint operations."""
def test_scale(self):
keypoints = tf.constant([
[[0.0, 0.0], [100.0, 200.0]],
[[50.0, 120.0], [100.0, 140.0]]
])
y_scale = tf.constant(1.0 / 100)
x_scale = tf.constant(1.0 / 200)
expected_keypoints = tf.constant([
[[0., 0.], [1.0, 1.0]],
[[0.5, 0.6], [1.0, 0.7]]
])
output = keypoint_ops.scale(keypoints, y_scale, x_scale)
with self.test_session() as sess:
output_, expected_keypoints_ = sess.run([output, expected_keypoints])
self.assertAllClose(output_, expected_keypoints_)
def test_clip_to_window(self):
keypoints = tf.constant([
[[0.25, 0.5], [0.75, 0.75]],
[[0.5, 0.0], [1.0, 1.0]]
])
window = tf.constant([0.25, 0.25, 0.75, 0.75])
expected_keypoints = tf.constant([
[[0.25, 0.5], [0.75, 0.75]],
[[0.5, 0.25], [0.75, 0.75]]
])
output = keypoint_ops.clip_to_window(keypoints, window)
with self.test_session() as sess:
output_, expected_keypoints_ = sess.run([output, expected_keypoints])
self.assertAllClose(output_, expected_keypoints_)
def test_prune_outside_window(self):
keypoints = tf.constant([
[[0.25, 0.5], [0.75, 0.75]],
[[0.5, 0.0], [1.0, 1.0]]
])
window = tf.constant([0.25, 0.25, 0.75, 0.75])
expected_keypoints = tf.constant([[[0.25, 0.5], [0.75, 0.75]],
[[np.nan, np.nan], [np.nan, np.nan]]])
output = keypoint_ops.prune_outside_window(keypoints, window)
with self.test_session() as sess:
output_, expected_keypoints_ = sess.run([output, expected_keypoints])
self.assertAllClose(output_, expected_keypoints_)
def test_change_coordinate_frame(self):
keypoints = tf.constant([
[[0.25, 0.5], [0.75, 0.75]],
[[0.5, 0.0], [1.0, 1.0]]
])
window = tf.constant([0.25, 0.25, 0.75, 0.75])
expected_keypoints = tf.constant([
[[0, 0.5], [1.0, 1.0]],
[[0.5, -0.5], [1.5, 1.5]]
])
output = keypoint_ops.change_coordinate_frame(keypoints, window)
with self.test_session() as sess:
output_, expected_keypoints_ = sess.run([output, expected_keypoints])
self.assertAllClose(output_, expected_keypoints_)
def test_to_normalized_coordinates(self):
keypoints = tf.constant([
[[10., 30.], [30., 45.]],
[[20., 0.], [40., 60.]]
])
output = keypoint_ops.to_normalized_coordinates(
keypoints, 40, 60)
expected_keypoints = tf.constant([
[[0.25, 0.5], [0.75, 0.75]],
[[0.5, 0.0], [1.0, 1.0]]
])
with self.test_session() as sess:
output_, expected_keypoints_ = sess.run([output, expected_keypoints])
self.assertAllClose(output_, expected_keypoints_)
def test_to_normalized_coordinates_already_normalized(self):
keypoints = tf.constant([
[[0.25, 0.5], [0.75, 0.75]],
[[0.5, 0.0], [1.0, 1.0]]
])
output = keypoint_ops.to_normalized_coordinates(
keypoints, 40, 60)
with self.test_session() as sess:
with self.assertRaisesOpError('assertion failed'):
sess.run(output)
def test_to_absolute_coordinates(self):
keypoints = tf.constant([
[[0.25, 0.5], [0.75, 0.75]],
[[0.5, 0.0], [1.0, 1.0]]
])
output = keypoint_ops.to_absolute_coordinates(
keypoints, 40, 60)
expected_keypoints = tf.constant([
[[10., 30.], [30., 45.]],
[[20., 0.], [40., 60.]]
])
with self.test_session() as sess:
output_, expected_keypoints_ = sess.run([output, expected_keypoints])
self.assertAllClose(output_, expected_keypoints_)
def test_to_absolute_coordinates_already_absolute(self):
keypoints = tf.constant([
[[10., 30.], [30., 45.]],
[[20., 0.], [40., 60.]]
])
output = keypoint_ops.to_absolute_coordinates(
keypoints, 40, 60)
with self.test_session() as sess:
with self.assertRaisesOpError('assertion failed'):
sess.run(output)
def test_flip_horizontal(self):
keypoints = tf.constant([
[[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
[[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]]
])
flip_permutation = [0, 2, 1]
expected_keypoints = tf.constant([
[[0.1, 0.9], [0.3, 0.7], [0.2, 0.8]],
[[0.4, 0.6], [0.6, 0.4], [0.5, 0.5]],
])
output = keypoint_ops.flip_horizontal(keypoints, 0.5, flip_permutation)
with self.test_session() as sess:
output_, expected_keypoints_ = sess.run([output, expected_keypoints])
self.assertAllClose(output_, expected_keypoints_)
def test_flip_vertical(self):
keypoints = tf.constant([
[[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
[[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]]
])
flip_permutation = [0, 2, 1]
expected_keypoints = tf.constant([
[[0.9, 0.1], [0.7, 0.3], [0.8, 0.2]],
[[0.6, 0.4], [0.4, 0.6], [0.5, 0.5]],
])
output = keypoint_ops.flip_vertical(keypoints, 0.5, flip_permutation)
with self.test_session() as sess:
output_, expected_keypoints_ = sess.run([output, expected_keypoints])
self.assertAllClose(output_, expected_keypoints_)
def test_rot90(self):
keypoints = tf.constant([
[[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
[[0.4, 0.6], [0.5, 0.6], [0.6, 0.7]]
])
expected_keypoints = tf.constant([
[[0.9, 0.1], [0.8, 0.2], [0.7, 0.3]],
[[0.4, 0.4], [0.4, 0.5], [0.3, 0.6]],
])
output = keypoint_ops.rot90(keypoints)
with self.test_session() as sess:
output_, expected_keypoints_ = sess.run([output, expected_keypoints])
self.assertAllClose(output_, expected_keypoints_)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Classification and regression loss functions for object detection.
Localization losses:
* WeightedL2LocalizationLoss
* WeightedSmoothL1LocalizationLoss
* WeightedIOULocalizationLoss
Classification losses:
* WeightedSigmoidClassificationLoss
* WeightedSoftmaxClassificationLoss
* WeightedSoftmaxClassificationAgainstLogitsLoss
* BootstrappedSigmoidClassificationLoss
"""
from abc import ABCMeta
from abc import abstractmethod
import tensorflow as tf
from object_detection.core import box_list
from object_detection.core import box_list_ops
from object_detection.utils import ops
slim = tf.contrib.slim
class Loss(object):
"""Abstract base class for loss functions."""
__metaclass__ = ABCMeta
def __call__(self,
prediction_tensor,
target_tensor,
ignore_nan_targets=False,
scope=None,
**params):
"""Call the loss function.
Args:
prediction_tensor: an N-d tensor of shape [batch, anchors, ...]
representing predicted quantities.
target_tensor: an N-d tensor of shape [batch, anchors, ...] representing
regression or classification targets.
ignore_nan_targets: whether to ignore nan targets in the loss computation.
E.g. can be used if the target tensor is missing groundtruth data that
shouldn't be factored into the loss.
scope: Op scope name. Defaults to 'Loss' if None.
**params: Additional keyword arguments for specific implementations of
the Loss.
Returns:
loss: a tensor representing the value of the loss function.
"""
with tf.name_scope(scope, 'Loss',
[prediction_tensor, target_tensor, params]) as scope:
if ignore_nan_targets:
target_tensor = tf.where(tf.is_nan(target_tensor),
prediction_tensor,
target_tensor)
return self._compute_loss(prediction_tensor, target_tensor, **params)
@abstractmethod
def _compute_loss(self, prediction_tensor, target_tensor, **params):
"""Method to be overridden by implementations.
Args:
prediction_tensor: a tensor representing predicted quantities
target_tensor: a tensor representing regression or classification targets
**params: Additional keyword arguments for specific implementations of
the Loss.
Returns:
loss: an N-d tensor of shape [batch, anchors, ...] containing the loss per
anchor
"""
pass
class WeightedL2LocalizationLoss(Loss):
"""L2 localization loss function with anchorwise output support.
Loss[b,a] = .5 * ||weights[b,a] * (prediction[b,a,:] - target[b,a,:])||^2
"""
def _compute_loss(self, prediction_tensor, target_tensor, weights):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
code_size] representing the (encoded) predicted locations of objects.
target_tensor: A float tensor of shape [batch_size, num_anchors,
code_size] representing the regression targets
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a float tensor of shape [batch_size, num_anchors] tensor
representing the value of the loss function.
"""
weighted_diff = (prediction_tensor - target_tensor) * tf.expand_dims(
weights, 2)
square_diff = 0.5 * tf.square(weighted_diff)
return tf.reduce_sum(square_diff, 2)
class WeightedSmoothL1LocalizationLoss(Loss):
"""Smooth L1 localization loss function aka Huber Loss..
The smooth L1_loss is defined elementwise as .5 x^2 if |x| <= delta and
0.5 x^2 + delta * (|x|-delta) otherwise, where x is the difference between
predictions and target.
See also Equation (3) in the Fast R-CNN paper by Ross Girshick (ICCV 2015)
"""
def __init__(self, delta=1.0):
"""Constructor.
Args:
delta: delta for smooth L1 loss.
"""
self._delta = delta
def _compute_loss(self, prediction_tensor, target_tensor, weights):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
code_size] representing the (encoded) predicted locations of objects.
target_tensor: A float tensor of shape [batch_size, num_anchors,
code_size] representing the regression targets
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a float tensor of shape [batch_size, num_anchors] tensor
representing the value of the loss function.
"""
return tf.reduce_sum(tf.losses.huber_loss(
target_tensor,
prediction_tensor,
delta=self._delta,
weights=tf.expand_dims(weights, axis=2),
loss_collection=None,
reduction=tf.losses.Reduction.NONE
), axis=2)
class WeightedIOULocalizationLoss(Loss):
"""IOU localization loss function.
Sums the IOU for corresponding pairs of predicted/groundtruth boxes
and for each pair assign a loss of 1 - IOU. We then compute a weighted
sum over all pairs which is returned as the total loss.
"""
def _compute_loss(self, prediction_tensor, target_tensor, weights):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors, 4]
representing the decoded predicted boxes
target_tensor: A float tensor of shape [batch_size, num_anchors, 4]
representing the decoded target boxes
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a float tensor of shape [batch_size, num_anchors] tensor
representing the value of the loss function.
"""
predicted_boxes = box_list.BoxList(tf.reshape(prediction_tensor, [-1, 4]))
target_boxes = box_list.BoxList(tf.reshape(target_tensor, [-1, 4]))
per_anchor_iou_loss = 1.0 - box_list_ops.matched_iou(predicted_boxes,
target_boxes)
return tf.reshape(weights, [-1]) * per_anchor_iou_loss
class WeightedSigmoidClassificationLoss(Loss):
"""Sigmoid cross entropy classification loss function."""
def _compute_loss(self,
prediction_tensor,
target_tensor,
weights,
class_indices=None):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing the predicted logits for each class
target_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing one-hot encoded classification targets
weights: a float tensor of shape [batch_size, num_anchors]
class_indices: (Optional) A 1-D integer tensor of class indices.
If provided, computes loss only for the specified class indices.
Returns:
loss: a float tensor of shape [batch_size, num_anchors, num_classes]
representing the value of the loss function.
"""
weights = tf.expand_dims(weights, 2)
if class_indices is not None:
weights *= tf.reshape(
ops.indices_to_dense_vector(class_indices,
tf.shape(prediction_tensor)[2]),
[1, 1, -1])
per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits(
labels=target_tensor, logits=prediction_tensor))
return per_entry_cross_ent * weights
class SigmoidFocalClassificationLoss(Loss):
"""Sigmoid focal cross entropy loss.
Focal loss down-weights well classified examples and focusses on the hard
examples. See https://arxiv.org/pdf/1708.02002.pdf for the loss definition.
"""
def __init__(self, gamma=2.0, alpha=0.25):
"""Constructor.
Args:
gamma: exponent of the modulating factor (1 - p_t) ^ gamma.
alpha: optional alpha weighting factor to balance positives vs negatives.
"""
self._alpha = alpha
self._gamma = gamma
def _compute_loss(self,
prediction_tensor,
target_tensor,
weights,
class_indices=None):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing the predicted logits for each class
target_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing one-hot encoded classification targets
weights: a float tensor of shape [batch_size, num_anchors]
class_indices: (Optional) A 1-D integer tensor of class indices.
If provided, computes loss only for the specified class indices.
Returns:
loss: a float tensor of shape [batch_size, num_anchors, num_classes]
representing the value of the loss function.
"""
weights = tf.expand_dims(weights, 2)
if class_indices is not None:
weights *= tf.reshape(
ops.indices_to_dense_vector(class_indices,
tf.shape(prediction_tensor)[2]),
[1, 1, -1])
per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits(
labels=target_tensor, logits=prediction_tensor))
prediction_probabilities = tf.sigmoid(prediction_tensor)
p_t = ((target_tensor * prediction_probabilities) +
((1 - target_tensor) * (1 - prediction_probabilities)))
modulating_factor = 1.0
if self._gamma:
modulating_factor = tf.pow(1.0 - p_t, self._gamma)
alpha_weight_factor = 1.0
if self._alpha is not None:
alpha_weight_factor = (target_tensor * self._alpha +
(1 - target_tensor) * (1 - self._alpha))
focal_cross_entropy_loss = (modulating_factor * alpha_weight_factor *
per_entry_cross_ent)
return focal_cross_entropy_loss * weights
class WeightedSoftmaxClassificationLoss(Loss):
"""Softmax loss function."""
def __init__(self, logit_scale=1.0):
"""Constructor.
Args:
logit_scale: When this value is high, the prediction is "diffused" and
when this value is low, the prediction is made peakier.
(default 1.0)
"""
self._logit_scale = logit_scale
def _compute_loss(self, prediction_tensor, target_tensor, weights):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing the predicted logits for each class
target_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing one-hot encoded classification targets
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a float tensor of shape [batch_size, num_anchors]
representing the value of the loss function.
"""
num_classes = prediction_tensor.get_shape().as_list()[-1]
prediction_tensor = tf.divide(
prediction_tensor, self._logit_scale, name='scale_logit')
per_row_cross_ent = (tf.nn.softmax_cross_entropy_with_logits(
labels=tf.reshape(target_tensor, [-1, num_classes]),
logits=tf.reshape(prediction_tensor, [-1, num_classes])))
return tf.reshape(per_row_cross_ent, tf.shape(weights)) * weights
class WeightedSoftmaxClassificationAgainstLogitsLoss(Loss):
"""Softmax loss function against logits.
Targets are expected to be provided in logits space instead of "one hot" or
"probability distribution" space.
"""
def __init__(self, logit_scale=1.0):
"""Constructor.
Args:
logit_scale: When this value is high, the target is "diffused" and
when this value is low, the target is made peakier.
(default 1.0)
"""
self._logit_scale = logit_scale
def _scale_and_softmax_logits(self, logits):
"""Scale logits then apply softmax."""
scaled_logits = tf.divide(logits, self._logit_scale, name='scale_logits')
return tf.nn.softmax(scaled_logits, name='convert_scores')
def _compute_loss(self, prediction_tensor, target_tensor, weights):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing the predicted logits for each class
target_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing logit classification targets
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a float tensor of shape [batch_size, num_anchors]
representing the value of the loss function.
"""
num_classes = prediction_tensor.get_shape().as_list()[-1]
target_tensor = self._scale_and_softmax_logits(target_tensor)
prediction_tensor = tf.divide(prediction_tensor, self._logit_scale,
name='scale_logits')
per_row_cross_ent = (tf.nn.softmax_cross_entropy_with_logits(
labels=tf.reshape(target_tensor, [-1, num_classes]),
logits=tf.reshape(prediction_tensor, [-1, num_classes])))
return tf.reshape(per_row_cross_ent, tf.shape(weights)) * weights
class BootstrappedSigmoidClassificationLoss(Loss):
"""Bootstrapped sigmoid cross entropy classification loss function.
This loss uses a convex combination of training labels and the current model's
predictions as training targets in the classification loss. The idea is that
as the model improves over time, its predictions can be trusted more and we
can use these predictions to mitigate the damage of noisy/incorrect labels,
because incorrect labels are likely to be eventually highly inconsistent with
other stimuli predicted to have the same label by the model.
In "soft" bootstrapping, we use all predicted class probabilities, whereas in
"hard" bootstrapping, we use the single class favored by the model.
See also Training Deep Neural Networks On Noisy Labels with Bootstrapping by
Reed et al. (ICLR 2015).
"""
def __init__(self, alpha, bootstrap_type='soft'):
"""Constructor.
Args:
alpha: a float32 scalar tensor between 0 and 1 representing interpolation
weight
bootstrap_type: set to either 'hard' or 'soft' (default)
Raises:
ValueError: if bootstrap_type is not either 'hard' or 'soft'
"""
if bootstrap_type != 'hard' and bootstrap_type != 'soft':
raise ValueError('Unrecognized bootstrap_type: must be one of '
'\'hard\' or \'soft.\'')
self._alpha = alpha
self._bootstrap_type = bootstrap_type
def _compute_loss(self, prediction_tensor, target_tensor, weights):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing the predicted logits for each class
target_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing one-hot encoded classification targets
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a float tensor of shape [batch_size, num_anchors, num_classes]
representing the value of the loss function.
"""
if self._bootstrap_type == 'soft':
bootstrap_target_tensor = self._alpha * target_tensor + (
1.0 - self._alpha) * tf.sigmoid(prediction_tensor)
else:
bootstrap_target_tensor = self._alpha * target_tensor + (
1.0 - self._alpha) * tf.cast(
tf.sigmoid(prediction_tensor) > 0.5, tf.float32)
per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits(
labels=bootstrap_target_tensor, logits=prediction_tensor))
return per_entry_cross_ent * tf.expand_dims(weights, 2)
class HardExampleMiner(object):
"""Hard example mining for regions in a list of images.
Implements hard example mining to select a subset of regions to be
back-propagated. For each image, selects the regions with highest losses,
subject to the condition that a newly selected region cannot have
an IOU > iou_threshold with any of the previously selected regions.
This can be achieved by re-using a greedy non-maximum suppression algorithm.
A constraint on the number of negatives mined per positive region can also be
enforced.
Reference papers: "Training Region-based Object Detectors with Online
Hard Example Mining" (CVPR 2016) by Srivastava et al., and
"SSD: Single Shot MultiBox Detector" (ECCV 2016) by Liu et al.
"""
def __init__(self,
num_hard_examples=64,
iou_threshold=0.7,
loss_type='both',
cls_loss_weight=0.05,
loc_loss_weight=0.06,
max_negatives_per_positive=None,
min_negatives_per_image=0):
"""Constructor.
The hard example mining implemented by this class can replicate the behavior
in the two aforementioned papers (Srivastava et al., and Liu et al).
To replicate the A2 paper (Srivastava et al), num_hard_examples is set
to a fixed parameter (64 by default) and iou_threshold is set to .7 for
running non-max-suppression the predicted boxes prior to hard mining.
In order to replicate the SSD paper (Liu et al), num_hard_examples should
be set to None, max_negatives_per_positive should be 3 and iou_threshold
should be 1.0 (in order to effectively turn off NMS).
Args:
num_hard_examples: maximum number of hard examples to be
selected per image (prior to enforcing max negative to positive ratio
constraint). If set to None, all examples obtained after NMS are
considered.
iou_threshold: minimum intersection over union for an example
to be discarded during NMS.
loss_type: use only classification losses ('cls', default),
localization losses ('loc') or both losses ('both').
In the last case, cls_loss_weight and loc_loss_weight are used to
compute weighted sum of the two losses.
cls_loss_weight: weight for classification loss.
loc_loss_weight: weight for location loss.
max_negatives_per_positive: maximum number of negatives to retain for
each positive anchor. By default, num_negatives_per_positive is None,
which means that we do not enforce a prespecified negative:positive
ratio. Note also that num_negatives_per_positives can be a float
(and will be converted to be a float even if it is passed in otherwise).
min_negatives_per_image: minimum number of negative anchors to sample for
a given image. Setting this to a positive number allows sampling
negatives in an image without any positive anchors and thus not biased
towards at least one detection per image.
"""
self._num_hard_examples = num_hard_examples
self._iou_threshold = iou_threshold
self._loss_type = loss_type
self._cls_loss_weight = cls_loss_weight
self._loc_loss_weight = loc_loss_weight
self._max_negatives_per_positive = max_negatives_per_positive
self._min_negatives_per_image = min_negatives_per_image
if self._max_negatives_per_positive is not None:
self._max_negatives_per_positive = float(self._max_negatives_per_positive)
self._num_positives_list = None
self._num_negatives_list = None
def __call__(self,
location_losses,
cls_losses,
decoded_boxlist_list,
match_list=None):
"""Computes localization and classification losses after hard mining.
Args:
location_losses: a float tensor of shape [num_images, num_anchors]
representing anchorwise localization losses.
cls_losses: a float tensor of shape [num_images, num_anchors]
representing anchorwise classification losses.
decoded_boxlist_list: a list of decoded BoxList representing location
predictions for each image.
match_list: an optional list of matcher.Match objects encoding the match
between anchors and groundtruth boxes for each image of the batch,
with rows of the Match objects corresponding to groundtruth boxes
and columns corresponding to anchors. Match objects in match_list are
used to reference which anchors are positive, negative or ignored. If
self._max_negatives_per_positive exists, these are then used to enforce
a prespecified negative to positive ratio.
Returns:
mined_location_loss: a float scalar with sum of localization losses from
selected hard examples.
mined_cls_loss: a float scalar with sum of classification losses from
selected hard examples.
Raises:
ValueError: if location_losses, cls_losses and decoded_boxlist_list do
not have compatible shapes (i.e., they must correspond to the same
number of images).
ValueError: if match_list is specified but its length does not match
len(decoded_boxlist_list).
"""
mined_location_losses = []
mined_cls_losses = []
location_losses = tf.unstack(location_losses)
cls_losses = tf.unstack(cls_losses)
num_images = len(decoded_boxlist_list)
if not match_list:
match_list = num_images * [None]
if not len(location_losses) == len(decoded_boxlist_list) == len(cls_losses):
raise ValueError('location_losses, cls_losses and decoded_boxlist_list '
'do not have compatible shapes.')
if not isinstance(match_list, list):
raise ValueError('match_list must be a list.')
if len(match_list) != len(decoded_boxlist_list):
raise ValueError('match_list must either be None or have '
'length=len(decoded_boxlist_list).')
num_positives_list = []
num_negatives_list = []
for ind, detection_boxlist in enumerate(decoded_boxlist_list):
box_locations = detection_boxlist.get()
match = match_list[ind]
image_losses = cls_losses[ind]
if self._loss_type == 'loc':
image_losses = location_losses[ind]
elif self._loss_type == 'both':
image_losses *= self._cls_loss_weight
image_losses += location_losses[ind] * self._loc_loss_weight
if self._num_hard_examples is not None:
num_hard_examples = self._num_hard_examples
else:
num_hard_examples = detection_boxlist.num_boxes()
selected_indices = tf.image.non_max_suppression(
box_locations, image_losses, num_hard_examples, self._iou_threshold)
if self._max_negatives_per_positive is not None and match:
(selected_indices, num_positives,
num_negatives) = self._subsample_selection_to_desired_neg_pos_ratio(
selected_indices, match, self._max_negatives_per_positive,
self._min_negatives_per_image)
num_positives_list.append(num_positives)
num_negatives_list.append(num_negatives)
mined_location_losses.append(
tf.reduce_sum(tf.gather(location_losses[ind], selected_indices)))
mined_cls_losses.append(
tf.reduce_sum(tf.gather(cls_losses[ind], selected_indices)))
location_loss = tf.reduce_sum(tf.stack(mined_location_losses))
cls_loss = tf.reduce_sum(tf.stack(mined_cls_losses))
if match and self._max_negatives_per_positive:
self._num_positives_list = num_positives_list
self._num_negatives_list = num_negatives_list
return (location_loss, cls_loss)
def summarize(self):
"""Summarize the number of positives and negatives after mining."""
if self._num_positives_list and self._num_negatives_list:
avg_num_positives = tf.reduce_mean(tf.to_float(self._num_positives_list))
avg_num_negatives = tf.reduce_mean(tf.to_float(self._num_negatives_list))
tf.summary.scalar('HardExampleMiner/NumPositives', avg_num_positives)
tf.summary.scalar('HardExampleMiner/NumNegatives', avg_num_negatives)
def _subsample_selection_to_desired_neg_pos_ratio(self,
indices,
match,
max_negatives_per_positive,
min_negatives_per_image=0):
"""Subsample a collection of selected indices to a desired neg:pos ratio.
This function takes a subset of M indices (indexing into a large anchor
collection of N anchors where M<N) which are labeled as positive/negative
via a Match object (matched indices are positive, unmatched indices
are negative). It returns a subset of the provided indices retaining all
positives as well as up to the first K negatives, where:
K=floor(num_negative_per_positive * num_positives).
For example, if indices=[2, 4, 5, 7, 9, 10] (indexing into 12 anchors),
with positives=[2, 5] and negatives=[4, 7, 9, 10] and
num_negatives_per_positive=1, then the returned subset of indices
is [2, 4, 5, 7].
Args:
indices: An integer tensor of shape [M] representing a collection
of selected anchor indices
match: A matcher.Match object encoding the match between anchors and
groundtruth boxes for a given image, with rows of the Match objects
corresponding to groundtruth boxes and columns corresponding to anchors.
max_negatives_per_positive: (float) maximum number of negatives for
each positive anchor.
min_negatives_per_image: minimum number of negative anchors for a given
image. Allow sampling negatives in image without any positive anchors.
Returns:
selected_indices: An integer tensor of shape [M'] representing a
collection of selected anchor indices with M' <= M.
num_positives: An integer tensor representing the number of positive
examples in selected set of indices.
num_negatives: An integer tensor representing the number of negative
examples in selected set of indices.
"""
positives_indicator = tf.gather(match.matched_column_indicator(), indices)
negatives_indicator = tf.gather(match.unmatched_column_indicator(), indices)
num_positives = tf.reduce_sum(tf.to_int32(positives_indicator))
max_negatives = tf.maximum(min_negatives_per_image,
tf.to_int32(max_negatives_per_positive *
tf.to_float(num_positives)))
topk_negatives_indicator = tf.less_equal(
tf.cumsum(tf.to_int32(negatives_indicator)), max_negatives)
subsampled_selection_indices = tf.where(
tf.logical_or(positives_indicator, topk_negatives_indicator))
num_negatives = tf.size(subsampled_selection_indices) - num_positives
return (tf.reshape(tf.gather(indices, subsampled_selection_indices), [-1]),
num_positives, num_negatives)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for google3.research.vale.object_detection.losses."""
import math
import numpy as np
import tensorflow as tf
from object_detection.core import box_list
from object_detection.core import losses
from object_detection.core import matcher
class WeightedL2LocalizationLossTest(tf.test.TestCase):
def testReturnsCorrectWeightedLoss(self):
batch_size = 3
num_anchors = 10
code_size = 4
prediction_tensor = tf.ones([batch_size, num_anchors, code_size])
target_tensor = tf.zeros([batch_size, num_anchors, code_size])
weights = tf.constant([[1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 1, 0, 0, 0, 0, 0]], tf.float32)
loss_op = losses.WeightedL2LocalizationLoss()
loss = tf.reduce_sum(loss_op(prediction_tensor, target_tensor,
weights=weights))
expected_loss = (3 * 5 * 4) / 2.0
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, expected_loss)
def testReturnsCorrectAnchorwiseLoss(self):
batch_size = 3
num_anchors = 16
code_size = 4
prediction_tensor = tf.ones([batch_size, num_anchors, code_size])
target_tensor = tf.zeros([batch_size, num_anchors, code_size])
weights = tf.ones([batch_size, num_anchors])
loss_op = losses.WeightedL2LocalizationLoss()
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
expected_loss = np.ones((batch_size, num_anchors)) * 2
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, expected_loss)
def testReturnsCorrectNanLoss(self):
batch_size = 3
num_anchors = 10
code_size = 4
prediction_tensor = tf.ones([batch_size, num_anchors, code_size])
target_tensor = tf.concat([
tf.zeros([batch_size, num_anchors, code_size / 2]),
tf.ones([batch_size, num_anchors, code_size / 2]) * np.nan
], axis=2)
weights = tf.ones([batch_size, num_anchors])
loss_op = losses.WeightedL2LocalizationLoss()
loss = loss_op(prediction_tensor, target_tensor, weights=weights,
ignore_nan_targets=True)
loss = tf.reduce_sum(loss)
expected_loss = (3 * 5 * 4) / 2.0
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, expected_loss)
class WeightedSmoothL1LocalizationLossTest(tf.test.TestCase):
def testReturnsCorrectLoss(self):
batch_size = 2
num_anchors = 3
code_size = 4
prediction_tensor = tf.constant([[[2.5, 0, .4, 0],
[0, 0, 0, 0],
[0, 2.5, 0, .4]],
[[3.5, 0, 0, 0],
[0, .4, 0, .9],
[0, 0, 1.5, 0]]], tf.float32)
target_tensor = tf.zeros([batch_size, num_anchors, code_size])
weights = tf.constant([[2, 1, 1],
[0, 3, 0]], tf.float32)
loss_op = losses.WeightedSmoothL1LocalizationLoss()
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
loss = tf.reduce_sum(loss)
exp_loss = 7.695
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, exp_loss)
class WeightedIOULocalizationLossTest(tf.test.TestCase):
def testReturnsCorrectLoss(self):
prediction_tensor = tf.constant([[[1.5, 0, 2.4, 1],
[0, 0, 1, 1],
[0, 0, .5, .25]]])
target_tensor = tf.constant([[[1.5, 0, 2.4, 1],
[0, 0, 1, 1],
[50, 50, 500.5, 100.25]]])
weights = [[1.0, .5, 2.0]]
loss_op = losses.WeightedIOULocalizationLoss()
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
loss = tf.reduce_sum(loss)
exp_loss = 2.0
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, exp_loss)
class WeightedSigmoidClassificationLossTest(tf.test.TestCase):
def testReturnsCorrectLoss(self):
prediction_tensor = tf.constant([[[-100, 100, -100],
[100, -100, -100],
[100, 0, -100],
[-100, -100, 100]],
[[-100, 0, 100],
[-100, 100, -100],
[100, 100, 100],
[0, 0, -1]]], tf.float32)
target_tensor = tf.constant([[[0, 1, 0],
[1, 0, 0],
[1, 0, 0],
[0, 0, 1]],
[[0, 0, 1],
[0, 1, 0],
[1, 1, 1],
[1, 0, 0]]], tf.float32)
weights = tf.constant([[1, 1, 1, 1],
[1, 1, 1, 0]], tf.float32)
loss_op = losses.WeightedSigmoidClassificationLoss()
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
loss = tf.reduce_sum(loss)
exp_loss = -2 * math.log(.5)
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, exp_loss)
def testReturnsCorrectAnchorWiseLoss(self):
prediction_tensor = tf.constant([[[-100, 100, -100],
[100, -100, -100],
[100, 0, -100],
[-100, -100, 100]],
[[-100, 0, 100],
[-100, 100, -100],
[100, 100, 100],
[0, 0, -1]]], tf.float32)
target_tensor = tf.constant([[[0, 1, 0],
[1, 0, 0],
[1, 0, 0],
[0, 0, 1]],
[[0, 0, 1],
[0, 1, 0],
[1, 1, 1],
[1, 0, 0]]], tf.float32)
weights = tf.constant([[1, 1, 1, 1],
[1, 1, 1, 0]], tf.float32)
loss_op = losses.WeightedSigmoidClassificationLoss()
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
loss = tf.reduce_sum(loss, axis=2)
exp_loss = np.matrix([[0, 0, -math.log(.5), 0],
[-math.log(.5), 0, 0, 0]])
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, exp_loss)
def testReturnsCorrectLossWithClassIndices(self):
prediction_tensor = tf.constant([[[-100, 100, -100, 100],
[100, -100, -100, -100],
[100, 0, -100, 100],
[-100, -100, 100, -100]],
[[-100, 0, 100, 100],
[-100, 100, -100, 100],
[100, 100, 100, 100],
[0, 0, -1, 100]]], tf.float32)
target_tensor = tf.constant([[[0, 1, 0, 0],
[1, 0, 0, 1],
[1, 0, 0, 0],
[0, 0, 1, 1]],
[[0, 0, 1, 0],
[0, 1, 0, 0],
[1, 1, 1, 0],
[1, 0, 0, 0]]], tf.float32)
weights = tf.constant([[1, 1, 1, 1],
[1, 1, 1, 0]], tf.float32)
# Ignores the last class.
class_indices = tf.constant([0, 1, 2], tf.int32)
loss_op = losses.WeightedSigmoidClassificationLoss()
loss = loss_op(prediction_tensor, target_tensor, weights=weights,
class_indices=class_indices)
loss = tf.reduce_sum(loss, axis=2)
exp_loss = np.matrix([[0, 0, -math.log(.5), 0],
[-math.log(.5), 0, 0, 0]])
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, exp_loss)
def _logit(probability):
return math.log(probability / (1. - probability))
class SigmoidFocalClassificationLossTest(tf.test.TestCase):
def testEasyExamplesProduceSmallLossComparedToSigmoidXEntropy(self):
prediction_tensor = tf.constant([[[_logit(0.97)],
[_logit(0.90)],
[_logit(0.73)],
[_logit(0.27)],
[_logit(0.09)],
[_logit(0.03)]]], tf.float32)
target_tensor = tf.constant([[[1],
[1],
[1],
[0],
[0],
[0]]], tf.float32)
weights = tf.constant([[1, 1, 1, 1, 1, 1]], tf.float32)
focal_loss_op = losses.SigmoidFocalClassificationLoss(gamma=2.0, alpha=None)
sigmoid_loss_op = losses.WeightedSigmoidClassificationLoss()
focal_loss = tf.reduce_sum(focal_loss_op(prediction_tensor, target_tensor,
weights=weights), axis=2)
sigmoid_loss = tf.reduce_sum(sigmoid_loss_op(prediction_tensor,
target_tensor,
weights=weights), axis=2)
with self.test_session() as sess:
sigmoid_loss, focal_loss = sess.run([sigmoid_loss, focal_loss])
order_of_ratio = np.power(10,
np.floor(np.log10(sigmoid_loss / focal_loss)))
self.assertAllClose(order_of_ratio, [[1000, 100, 10, 10, 100, 1000]])
def testHardExamplesProduceLossComparableToSigmoidXEntropy(self):
prediction_tensor = tf.constant([[[_logit(0.55)],
[_logit(0.52)],
[_logit(0.50)],
[_logit(0.48)],
[_logit(0.45)]]], tf.float32)
target_tensor = tf.constant([[[1],
[1],
[1],
[0],
[0]]], tf.float32)
weights = tf.constant([[1, 1, 1, 1, 1]], tf.float32)
focal_loss_op = losses.SigmoidFocalClassificationLoss(gamma=2.0, alpha=None)
sigmoid_loss_op = losses.WeightedSigmoidClassificationLoss()
focal_loss = tf.reduce_sum(focal_loss_op(prediction_tensor, target_tensor,
weights=weights), axis=2)
sigmoid_loss = tf.reduce_sum(sigmoid_loss_op(prediction_tensor,
target_tensor,
weights=weights), axis=2)
with self.test_session() as sess:
sigmoid_loss, focal_loss = sess.run([sigmoid_loss, focal_loss])
order_of_ratio = np.power(10,
np.floor(np.log10(sigmoid_loss / focal_loss)))
self.assertAllClose(order_of_ratio, [[1., 1., 1., 1., 1.]])
def testNonAnchorWiseOutputComparableToSigmoidXEntropy(self):
prediction_tensor = tf.constant([[[_logit(0.55)],
[_logit(0.52)],
[_logit(0.50)],
[_logit(0.48)],
[_logit(0.45)]]], tf.float32)
target_tensor = tf.constant([[[1],
[1],
[1],
[0],
[0]]], tf.float32)
weights = tf.constant([[1, 1, 1, 1, 1]], tf.float32)
focal_loss_op = losses.SigmoidFocalClassificationLoss(gamma=2.0, alpha=None)
sigmoid_loss_op = losses.WeightedSigmoidClassificationLoss()
focal_loss = tf.reduce_sum(focal_loss_op(prediction_tensor, target_tensor,
weights=weights))
sigmoid_loss = tf.reduce_sum(sigmoid_loss_op(prediction_tensor,
target_tensor,
weights=weights))
with self.test_session() as sess:
sigmoid_loss, focal_loss = sess.run([sigmoid_loss, focal_loss])
order_of_ratio = np.power(10,
np.floor(np.log10(sigmoid_loss / focal_loss)))
self.assertAlmostEqual(order_of_ratio, 1.)
def testIgnoreNegativeExampleLossViaAlphaMultiplier(self):
prediction_tensor = tf.constant([[[_logit(0.55)],
[_logit(0.52)],
[_logit(0.50)],
[_logit(0.48)],
[_logit(0.45)]]], tf.float32)
target_tensor = tf.constant([[[1],
[1],
[1],
[0],
[0]]], tf.float32)
weights = tf.constant([[1, 1, 1, 1, 1]], tf.float32)
focal_loss_op = losses.SigmoidFocalClassificationLoss(gamma=2.0, alpha=1.0)
sigmoid_loss_op = losses.WeightedSigmoidClassificationLoss()
focal_loss = tf.reduce_sum(focal_loss_op(prediction_tensor, target_tensor,
weights=weights), axis=2)
sigmoid_loss = tf.reduce_sum(sigmoid_loss_op(prediction_tensor,
target_tensor,
weights=weights), axis=2)
with self.test_session() as sess:
sigmoid_loss, focal_loss = sess.run([sigmoid_loss, focal_loss])
self.assertAllClose(focal_loss[0][3:], [0., 0.])
order_of_ratio = np.power(10,
np.floor(np.log10(sigmoid_loss[0][:3] /
focal_loss[0][:3])))
self.assertAllClose(order_of_ratio, [1., 1., 1.])
def testIgnorePositiveExampleLossViaAlphaMultiplier(self):
prediction_tensor = tf.constant([[[_logit(0.55)],
[_logit(0.52)],
[_logit(0.50)],
[_logit(0.48)],
[_logit(0.45)]]], tf.float32)
target_tensor = tf.constant([[[1],
[1],
[1],
[0],
[0]]], tf.float32)
weights = tf.constant([[1, 1, 1, 1, 1]], tf.float32)
focal_loss_op = losses.SigmoidFocalClassificationLoss(gamma=2.0, alpha=0.0)
sigmoid_loss_op = losses.WeightedSigmoidClassificationLoss()
focal_loss = tf.reduce_sum(focal_loss_op(prediction_tensor, target_tensor,
weights=weights), axis=2)
sigmoid_loss = tf.reduce_sum(sigmoid_loss_op(prediction_tensor,
target_tensor,
weights=weights), axis=2)
with self.test_session() as sess:
sigmoid_loss, focal_loss = sess.run([sigmoid_loss, focal_loss])
self.assertAllClose(focal_loss[0][:3], [0., 0., 0.])
order_of_ratio = np.power(10,
np.floor(np.log10(sigmoid_loss[0][3:] /
focal_loss[0][3:])))
self.assertAllClose(order_of_ratio, [1., 1.])
def testSimilarToSigmoidXEntropyWithHalfAlphaAndZeroGammaUpToAScale(self):
prediction_tensor = tf.constant([[[-100, 100, -100],
[100, -100, -100],
[100, 0, -100],
[-100, -100, 100]],
[[-100, 0, 100],
[-100, 100, -100],
[100, 100, 100],
[0, 0, -1]]], tf.float32)
target_tensor = tf.constant([[[0, 1, 0],
[1, 0, 0],
[1, 0, 0],
[0, 0, 1]],
[[0, 0, 1],
[0, 1, 0],
[1, 1, 1],
[1, 0, 0]]], tf.float32)
weights = tf.constant([[1, 1, 1, 1],
[1, 1, 1, 0]], tf.float32)
focal_loss_op = losses.SigmoidFocalClassificationLoss(alpha=0.5, gamma=0.0)
sigmoid_loss_op = losses.WeightedSigmoidClassificationLoss()
focal_loss = focal_loss_op(prediction_tensor, target_tensor,
weights=weights)
sigmoid_loss = sigmoid_loss_op(prediction_tensor, target_tensor,
weights=weights)
with self.test_session() as sess:
sigmoid_loss, focal_loss = sess.run([sigmoid_loss, focal_loss])
self.assertAllClose(sigmoid_loss, focal_loss * 2)
def testSameAsSigmoidXEntropyWithNoAlphaAndZeroGamma(self):
prediction_tensor = tf.constant([[[-100, 100, -100],
[100, -100, -100],
[100, 0, -100],
[-100, -100, 100]],
[[-100, 0, 100],
[-100, 100, -100],
[100, 100, 100],
[0, 0, -1]]], tf.float32)
target_tensor = tf.constant([[[0, 1, 0],
[1, 0, 0],
[1, 0, 0],
[0, 0, 1]],
[[0, 0, 1],
[0, 1, 0],
[1, 1, 1],
[1, 0, 0]]], tf.float32)
weights = tf.constant([[1, 1, 1, 1],
[1, 1, 1, 0]], tf.float32)
focal_loss_op = losses.SigmoidFocalClassificationLoss(alpha=None, gamma=0.0)
sigmoid_loss_op = losses.WeightedSigmoidClassificationLoss()
focal_loss = focal_loss_op(prediction_tensor, target_tensor,
weights=weights)
sigmoid_loss = sigmoid_loss_op(prediction_tensor, target_tensor,
weights=weights)
with self.test_session() as sess:
sigmoid_loss, focal_loss = sess.run([sigmoid_loss, focal_loss])
self.assertAllClose(sigmoid_loss, focal_loss)
def testExpectedLossWithAlphaOneAndZeroGamma(self):
# All zeros correspond to 0.5 probability.
prediction_tensor = tf.constant([[[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
[0, 0, 0]],
[[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
[0, 0, 0]]], tf.float32)
target_tensor = tf.constant([[[0, 1, 0],
[1, 0, 0],
[1, 0, 0],
[0, 0, 1]],
[[0, 0, 1],
[0, 1, 0],
[1, 0, 0],
[1, 0, 0]]], tf.float32)
weights = tf.constant([[1, 1, 1, 1],
[1, 1, 1, 1]], tf.float32)
focal_loss_op = losses.SigmoidFocalClassificationLoss(alpha=1.0, gamma=0.0)
focal_loss = tf.reduce_sum(focal_loss_op(prediction_tensor, target_tensor,
weights=weights))
with self.test_session() as sess:
focal_loss = sess.run(focal_loss)
self.assertAllClose(
(-math.log(.5) * # x-entropy per class per anchor
1.0 * # alpha
8), # positives from 8 anchors
focal_loss)
def testExpectedLossWithAlpha75AndZeroGamma(self):
# All zeros correspond to 0.5 probability.
prediction_tensor = tf.constant([[[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
[0, 0, 0]],
[[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
[0, 0, 0]]], tf.float32)
target_tensor = tf.constant([[[0, 1, 0],
[1, 0, 0],
[1, 0, 0],
[0, 0, 1]],
[[0, 0, 1],
[0, 1, 0],
[1, 0, 0],
[1, 0, 0]]], tf.float32)
weights = tf.constant([[1, 1, 1, 1],
[1, 1, 1, 1]], tf.float32)
focal_loss_op = losses.SigmoidFocalClassificationLoss(alpha=0.75, gamma=0.0)
focal_loss = tf.reduce_sum(focal_loss_op(prediction_tensor, target_tensor,
weights=weights))
with self.test_session() as sess:
focal_loss = sess.run(focal_loss)
self.assertAllClose(
(-math.log(.5) * # x-entropy per class per anchor.
((0.75 * # alpha for positives.
8) + # positives from 8 anchors.
(0.25 * # alpha for negatives.
8 * 2))), # negatives from 8 anchors for two classes.
focal_loss)
class WeightedSoftmaxClassificationLossTest(tf.test.TestCase):
def testReturnsCorrectLoss(self):
prediction_tensor = tf.constant([[[-100, 100, -100],
[100, -100, -100],
[0, 0, -100],
[-100, -100, 100]],
[[-100, 0, 0],
[-100, 100, -100],
[-100, 100, -100],
[100, -100, -100]]], tf.float32)
target_tensor = tf.constant([[[0, 1, 0],
[1, 0, 0],
[1, 0, 0],
[0, 0, 1]],
[[0, 0, 1],
[0, 1, 0],
[0, 1, 0],
[1, 0, 0]]], tf.float32)
weights = tf.constant([[1, 1, .5, 1],
[1, 1, 1, 0]], tf.float32)
loss_op = losses.WeightedSoftmaxClassificationLoss()
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
loss = tf.reduce_sum(loss)
exp_loss = - 1.5 * math.log(.5)
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, exp_loss)
def testReturnsCorrectAnchorWiseLoss(self):
prediction_tensor = tf.constant([[[-100, 100, -100],
[100, -100, -100],
[0, 0, -100],
[-100, -100, 100]],
[[-100, 0, 0],
[-100, 100, -100],
[-100, 100, -100],
[100, -100, -100]]], tf.float32)
target_tensor = tf.constant([[[0, 1, 0],
[1, 0, 0],
[1, 0, 0],
[0, 0, 1]],
[[0, 0, 1],
[0, 1, 0],
[0, 1, 0],
[1, 0, 0]]], tf.float32)
weights = tf.constant([[1, 1, .5, 1],
[1, 1, 1, 0]], tf.float32)
loss_op = losses.WeightedSoftmaxClassificationLoss()
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
exp_loss = np.matrix([[0, 0, - 0.5 * math.log(.5), 0],
[-math.log(.5), 0, 0, 0]])
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, exp_loss)
def testReturnsCorrectAnchorWiseLossWithHighLogitScaleSetting(self):
"""At very high logit_scale, all predictions will be ~0.33."""
# TODO(yonib): Also test logit_scale with anchorwise=False.
logit_scale = 10e16
prediction_tensor = tf.constant([[[-100, 100, -100],
[100, -100, -100],
[0, 0, -100],
[-100, -100, 100]],
[[-100, 0, 0],
[-100, 100, -100],
[-100, 100, -100],
[100, -100, -100]]], tf.float32)
target_tensor = tf.constant([[[0, 1, 0],
[1, 0, 0],
[1, 0, 0],
[0, 0, 1]],
[[0, 0, 1],
[0, 1, 0],
[0, 1, 0],
[1, 0, 0]]], tf.float32)
weights = tf.constant([[1, 1, 1, 1],
[1, 1, 1, 1]], tf.float32)
loss_op = losses.WeightedSoftmaxClassificationLoss(logit_scale=logit_scale)
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
uniform_distribution_loss = - math.log(.33333333333)
exp_loss = np.matrix([[uniform_distribution_loss] * 4,
[uniform_distribution_loss] * 4])
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, exp_loss)
class WeightedSoftmaxClassificationAgainstLogitsLossTest(tf.test.TestCase):
def testReturnsCorrectLoss(self):
prediction_tensor = tf.constant([[[-100, 100, -100],
[100, -100, -100],
[0, 0, -100],
[-100, -100, 100]],
[[-100, 0, 0],
[-100, 100, -100],
[-100, 100, -100],
[100, -100, -100]]], tf.float32)
target_tensor = tf.constant([[[-100, 100, -100],
[100, -100, -100],
[100, -100, -100],
[-100, -100, 100]],
[[-100, -100, 100],
[-100, 100, -100],
[-100, 100, -100],
[100, -100, -100]]], tf.float32)
weights = tf.constant([[1, 1, .5, 1],
[1, 1, 1, 1]], tf.float32)
loss_op = losses.WeightedSoftmaxClassificationAgainstLogitsLoss()
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
loss = tf.reduce_sum(loss)
exp_loss = - 1.5 * math.log(.5)
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, exp_loss)
def testReturnsCorrectAnchorWiseLoss(self):
prediction_tensor = tf.constant([[[-100, 100, -100],
[100, -100, -100],
[0, 0, -100],
[-100, -100, 100]],
[[-100, 0, 0],
[-100, 100, -100],
[-100, 100, -100],
[100, -100, -100]]], tf.float32)
target_tensor = tf.constant([[[-100, 100, -100],
[100, -100, -100],
[100, -100, -100],
[-100, -100, 100]],
[[-100, -100, 100],
[-100, 100, -100],
[-100, 100, -100],
[100, -100, -100]]], tf.float32)
weights = tf.constant([[1, 1, .5, 1],
[1, 1, 1, 0]], tf.float32)
loss_op = losses.WeightedSoftmaxClassificationAgainstLogitsLoss()
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
exp_loss = np.matrix([[0, 0, - 0.5 * math.log(.5), 0],
[-math.log(.5), 0, 0, 0]])
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, exp_loss)
def testReturnsCorrectAnchorWiseLossWithLogitScaleSetting(self):
logit_scale = 100.
prediction_tensor = tf.constant([[[-100, 100, -100],
[100, -100, -100],
[0, 0, -100],
[-100, -100, 100]],
[[-100, 0, 0],
[-100, 100, -100],
[-100, 100, -100],
[100, -100, -100]]], tf.float32)
target_tensor = tf.constant([[[-100, 100, -100],
[100, -100, -100],
[0, 0, -100],
[-100, -100, 100]],
[[-100, 0, 0],
[-100, 100, -100],
[-100, 100, -100],
[100, -100, -100]]], tf.float32)
weights = tf.constant([[1, 1, .5, 1],
[1, 1, 1, 0]], tf.float32)
loss_op = losses.WeightedSoftmaxClassificationAgainstLogitsLoss(
logit_scale=logit_scale)
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
# find softmax of the two prediction types above
softmax_pred1 = [np.exp(-1), np.exp(-1), np.exp(1)]
softmax_pred1 /= sum(softmax_pred1)
softmax_pred2 = [np.exp(0), np.exp(0), np.exp(-1)]
softmax_pred2 /= sum(softmax_pred2)
# compute the expected cross entropy for perfect matches
exp_entropy1 = sum(
[-x*np.log(x) for x in softmax_pred1])
exp_entropy2 = sum(
[-x*np.log(x) for x in softmax_pred2])
# weighted expected losses
exp_loss = np.matrix(
[[exp_entropy1, exp_entropy1, exp_entropy2*.5, exp_entropy1],
[exp_entropy2, exp_entropy1, exp_entropy1, 0.]])
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, exp_loss)
class BootstrappedSigmoidClassificationLossTest(tf.test.TestCase):
def testReturnsCorrectLossSoftBootstrapping(self):
prediction_tensor = tf.constant([[[-100, 100, 0],
[100, -100, -100],
[100, -100, -100],
[-100, -100, 100]],
[[-100, -100, 100],
[-100, 100, -100],
[100, 100, 100],
[0, 0, -1]]], tf.float32)
target_tensor = tf.constant([[[0, 1, 0],
[1, 0, 0],
[1, 0, 0],
[0, 0, 1]],
[[0, 0, 1],
[0, 1, 0],
[1, 1, 1],
[1, 0, 0]]], tf.float32)
weights = tf.constant([[1, 1, 1, 1],
[1, 1, 1, 0]], tf.float32)
alpha = tf.constant(.5, tf.float32)
loss_op = losses.BootstrappedSigmoidClassificationLoss(
alpha, bootstrap_type='soft')
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
loss = tf.reduce_sum(loss)
exp_loss = -math.log(.5)
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, exp_loss)
def testReturnsCorrectLossHardBootstrapping(self):
prediction_tensor = tf.constant([[[-100, 100, 0],
[100, -100, -100],
[100, -100, -100],
[-100, -100, 100]],
[[-100, -100, 100],
[-100, 100, -100],
[100, 100, 100],
[0, 0, -1]]], tf.float32)
target_tensor = tf.constant([[[0, 1, 0],
[1, 0, 0],
[1, 0, 0],
[0, 0, 1]],
[[0, 0, 1],
[0, 1, 0],
[1, 1, 1],
[1, 0, 0]]], tf.float32)
weights = tf.constant([[1, 1, 1, 1],
[1, 1, 1, 0]], tf.float32)
alpha = tf.constant(.5, tf.float32)
loss_op = losses.BootstrappedSigmoidClassificationLoss(
alpha, bootstrap_type='hard')
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
loss = tf.reduce_sum(loss)
exp_loss = -math.log(.5)
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, exp_loss)
def testReturnsCorrectAnchorWiseLoss(self):
prediction_tensor = tf.constant([[[-100, 100, -100],
[100, -100, -100],
[100, 0, -100],
[-100, -100, 100]],
[[-100, 0, 100],
[-100, 100, -100],
[100, 100, 100],
[0, 0, -1]]], tf.float32)
target_tensor = tf.constant([[[0, 1, 0],
[1, 0, 0],
[1, 0, 0],
[0, 0, 1]],
[[0, 0, 1],
[0, 1, 0],
[1, 1, 1],
[1, 0, 0]]], tf.float32)
weights = tf.constant([[1, 1, 1, 1],
[1, 1, 1, 0]], tf.float32)
alpha = tf.constant(.5, tf.float32)
loss_op = losses.BootstrappedSigmoidClassificationLoss(
alpha, bootstrap_type='hard')
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
loss = tf.reduce_sum(loss, axis=2)
exp_loss = np.matrix([[0, 0, -math.log(.5), 0],
[-math.log(.5), 0, 0, 0]])
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, exp_loss)
class HardExampleMinerTest(tf.test.TestCase):
def testHardMiningWithSingleLossType(self):
location_losses = tf.constant([[100, 90, 80, 0],
[0, 1, 2, 3]], tf.float32)
cls_losses = tf.constant([[0, 10, 50, 110],
[9, 6, 3, 0]], tf.float32)
box_corners = tf.constant([[0.1, 0.1, 0.9, 0.9],
[0.1, 0.1, 0.9, 0.9],
[0.1, 0.1, 0.9, 0.9],
[0.1, 0.1, 0.9, 0.9]], tf.float32)
decoded_boxlist_list = []
decoded_boxlist_list.append(box_list.BoxList(box_corners))
decoded_boxlist_list.append(box_list.BoxList(box_corners))
# Uses only location loss to select hard examples
loss_op = losses.HardExampleMiner(num_hard_examples=1,
iou_threshold=0.0,
loss_type='loc',
cls_loss_weight=1,
loc_loss_weight=1)
(loc_loss, cls_loss) = loss_op(location_losses, cls_losses,
decoded_boxlist_list)
exp_loc_loss = 100 + 3
exp_cls_loss = 0 + 0
with self.test_session() as sess:
loc_loss_output = sess.run(loc_loss)
self.assertAllClose(loc_loss_output, exp_loc_loss)
cls_loss_output = sess.run(cls_loss)
self.assertAllClose(cls_loss_output, exp_cls_loss)
def testHardMiningWithBothLossType(self):
location_losses = tf.constant([[100, 90, 80, 0],
[0, 1, 2, 3]], tf.float32)
cls_losses = tf.constant([[0, 10, 50, 110],
[9, 6, 3, 0]], tf.float32)
box_corners = tf.constant([[0.1, 0.1, 0.9, 0.9],
[0.1, 0.1, 0.9, 0.9],
[0.1, 0.1, 0.9, 0.9],
[0.1, 0.1, 0.9, 0.9]], tf.float32)
decoded_boxlist_list = []
decoded_boxlist_list.append(box_list.BoxList(box_corners))
decoded_boxlist_list.append(box_list.BoxList(box_corners))
loss_op = losses.HardExampleMiner(num_hard_examples=1,
iou_threshold=0.0,
loss_type='both',
cls_loss_weight=1,
loc_loss_weight=1)
(loc_loss, cls_loss) = loss_op(location_losses, cls_losses,
decoded_boxlist_list)
exp_loc_loss = 80 + 0
exp_cls_loss = 50 + 9
with self.test_session() as sess:
loc_loss_output = sess.run(loc_loss)
self.assertAllClose(loc_loss_output, exp_loc_loss)
cls_loss_output = sess.run(cls_loss)
self.assertAllClose(cls_loss_output, exp_cls_loss)
def testHardMiningNMS(self):
location_losses = tf.constant([[100, 90, 80, 0],
[0, 1, 2, 3]], tf.float32)
cls_losses = tf.constant([[0, 10, 50, 110],
[9, 6, 3, 0]], tf.float32)
box_corners = tf.constant([[0.1, 0.1, 0.9, 0.9],
[0.9, 0.9, 0.99, 0.99],
[0.1, 0.1, 0.9, 0.9],
[0.1, 0.1, 0.9, 0.9]], tf.float32)
decoded_boxlist_list = []
decoded_boxlist_list.append(box_list.BoxList(box_corners))
decoded_boxlist_list.append(box_list.BoxList(box_corners))
loss_op = losses.HardExampleMiner(num_hard_examples=2,
iou_threshold=0.5,
loss_type='cls',
cls_loss_weight=1,
loc_loss_weight=1)
(loc_loss, cls_loss) = loss_op(location_losses, cls_losses,
decoded_boxlist_list)
exp_loc_loss = 0 + 90 + 0 + 1
exp_cls_loss = 110 + 10 + 9 + 6
with self.test_session() as sess:
loc_loss_output = sess.run(loc_loss)
self.assertAllClose(loc_loss_output, exp_loc_loss)
cls_loss_output = sess.run(cls_loss)
self.assertAllClose(cls_loss_output, exp_cls_loss)
def testEnforceNegativesPerPositiveRatio(self):
location_losses = tf.constant([[100, 90, 80, 0, 1, 2,
3, 10, 20, 100, 20, 3]], tf.float32)
cls_losses = tf.constant([[0, 0, 100, 0, 90, 70,
0, 60, 0, 17, 13, 0]], tf.float32)
box_corners = tf.constant([[0.0, 0.0, 0.2, 0.1],
[0.0, 0.0, 0.2, 0.1],
[0.0, 0.0, 0.2, 0.1],
[0.0, 0.0, 0.2, 0.1],
[0.0, 0.0, 0.5, 0.1],
[0.0, 0.0, 0.6, 0.1],
[0.0, 0.0, 0.2, 0.1],
[0.0, 0.0, 0.8, 0.1],
[0.0, 0.0, 0.2, 0.1],
[0.0, 0.0, 1.0, 0.1],
[0.0, 0.0, 1.1, 0.1],
[0.0, 0.0, 0.2, 0.1]], tf.float32)
match_results = tf.constant([2, -1, 0, -1, -1, 1, -1, -1, -1, -1, -1, 3])
match_list = [matcher.Match(match_results)]
decoded_boxlist_list = []
decoded_boxlist_list.append(box_list.BoxList(box_corners))
max_negatives_per_positive_list = [0.0, 0.5, 1.0, 1.5, 10]
exp_loc_loss_list = [80 + 2,
80 + 1 + 2,
80 + 1 + 2 + 10,
80 + 1 + 2 + 10 + 100,
80 + 1 + 2 + 10 + 100 + 20]
exp_cls_loss_list = [100 + 70,
100 + 90 + 70,
100 + 90 + 70 + 60,
100 + 90 + 70 + 60 + 17,
100 + 90 + 70 + 60 + 17 + 13]
for max_negatives_per_positive, exp_loc_loss, exp_cls_loss in zip(
max_negatives_per_positive_list, exp_loc_loss_list, exp_cls_loss_list):
loss_op = losses.HardExampleMiner(
num_hard_examples=None, iou_threshold=0.9999, loss_type='cls',
cls_loss_weight=1, loc_loss_weight=1,
max_negatives_per_positive=max_negatives_per_positive)
(loc_loss, cls_loss) = loss_op(location_losses, cls_losses,
decoded_boxlist_list, match_list)
loss_op.summarize()
with self.test_session() as sess:
loc_loss_output = sess.run(loc_loss)
self.assertAllClose(loc_loss_output, exp_loc_loss)
cls_loss_output = sess.run(cls_loss)
self.assertAllClose(cls_loss_output, exp_cls_loss)
def testEnforceNegativesPerPositiveRatioWithMinNegativesPerImage(self):
location_losses = tf.constant([[100, 90, 80, 0, 1, 2,
3, 10, 20, 100, 20, 3]], tf.float32)
cls_losses = tf.constant([[0, 0, 100, 0, 90, 70,
0, 60, 0, 17, 13, 0]], tf.float32)
box_corners = tf.constant([[0.0, 0.0, 0.2, 0.1],
[0.0, 0.0, 0.2, 0.1],
[0.0, 0.0, 0.2, 0.1],
[0.0, 0.0, 0.2, 0.1],
[0.0, 0.0, 0.5, 0.1],
[0.0, 0.0, 0.6, 0.1],
[0.0, 0.0, 0.2, 0.1],
[0.0, 0.0, 0.8, 0.1],
[0.0, 0.0, 0.2, 0.1],
[0.0, 0.0, 1.0, 0.1],
[0.0, 0.0, 1.1, 0.1],
[0.0, 0.0, 0.2, 0.1]], tf.float32)
match_results = tf.constant([-1] * 12)
match_list = [matcher.Match(match_results)]
decoded_boxlist_list = []
decoded_boxlist_list.append(box_list.BoxList(box_corners))
min_negatives_per_image_list = [0, 1, 2, 4, 5, 6]
exp_loc_loss_list = [0,
80,
80 + 1,
80 + 1 + 2 + 10,
80 + 1 + 2 + 10 + 100,
80 + 1 + 2 + 10 + 100 + 20]
exp_cls_loss_list = [0,
100,
100 + 90,
100 + 90 + 70 + 60,
100 + 90 + 70 + 60 + 17,
100 + 90 + 70 + 60 + 17 + 13]
for min_negatives_per_image, exp_loc_loss, exp_cls_loss in zip(
min_negatives_per_image_list, exp_loc_loss_list, exp_cls_loss_list):
loss_op = losses.HardExampleMiner(
num_hard_examples=None, iou_threshold=0.9999, loss_type='cls',
cls_loss_weight=1, loc_loss_weight=1,
max_negatives_per_positive=3,
min_negatives_per_image=min_negatives_per_image)
(loc_loss, cls_loss) = loss_op(location_losses, cls_losses,
decoded_boxlist_list, match_list)
with self.test_session() as sess:
loc_loss_output = sess.run(loc_loss)
self.assertAllClose(loc_loss_output, exp_loc_loss)
cls_loss_output = sess.run(cls_loss)
self.assertAllClose(cls_loss_output, exp_cls_loss)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Matcher interface and Match class.
This module defines the Matcher interface and the Match object. The job of the
matcher is to match row and column indices based on the similarity matrix and
other optional parameters. Each column is matched to at most one row. There
are three possibilities for the matching:
1) match: A column matches a row.
2) no_match: A column does not match any row.
3) ignore: A column that is neither 'match' nor no_match.
The ignore case is regularly encountered in object detection: when an anchor has
a relatively small overlap with a ground-truth box, one neither wants to
consider this box a positive example (match) nor a negative example (no match).
The Match class is used to store the match results and it provides simple apis
to query the results.
"""
from abc import ABCMeta
from abc import abstractmethod
import tensorflow as tf
from object_detection.utils import ops
class Match(object):
"""Class to store results from the matcher.
This class is used to store the results from the matcher. It provides
convenient methods to query the matching results.
"""
def __init__(self, match_results, use_matmul_gather=False):
"""Constructs a Match object.
Args:
match_results: Integer tensor of shape [N] with (1) match_results[i]>=0,
meaning that column i is matched with row match_results[i].
(2) match_results[i]=-1, meaning that column i is not matched.
(3) match_results[i]=-2, meaning that column i is ignored.
use_matmul_gather: Use matrix multiplication based gather instead of
standard tf.gather. (Default: False).
Raises:
ValueError: if match_results does not have rank 1 or is not an
integer int32 scalar tensor
"""
if match_results.shape.ndims != 1:
raise ValueError('match_results should have rank 1')
if match_results.dtype != tf.int32:
raise ValueError('match_results should be an int32 or int64 scalar '
'tensor')
self._match_results = match_results
self._gather_op = tf.gather
if use_matmul_gather:
self._gather_op = ops.matmul_gather_on_zeroth_axis
@property
def match_results(self):
"""The accessor for match results.
Returns:
the tensor which encodes the match results.
"""
return self._match_results
def matched_column_indices(self):
"""Returns column indices that match to some row.
The indices returned by this op are always sorted in increasing order.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return self._reshape_and_cast(tf.where(tf.greater(self._match_results, -1)))
def matched_column_indicator(self):
"""Returns column indices that are matched.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return tf.greater_equal(self._match_results, 0)
def num_matched_columns(self):
"""Returns number (int32 scalar tensor) of matched columns."""
return tf.size(self.matched_column_indices())
def unmatched_column_indices(self):
"""Returns column indices that do not match any row.
The indices returned by this op are always sorted in increasing order.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return self._reshape_and_cast(tf.where(tf.equal(self._match_results, -1)))
def unmatched_column_indicator(self):
"""Returns column indices that are unmatched.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return tf.equal(self._match_results, -1)
def num_unmatched_columns(self):
"""Returns number (int32 scalar tensor) of unmatched columns."""
return tf.size(self.unmatched_column_indices())
def ignored_column_indices(self):
"""Returns column indices that are ignored (neither Matched nor Unmatched).
The indices returned by this op are always sorted in increasing order.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return self._reshape_and_cast(tf.where(self.ignored_column_indicator()))
def ignored_column_indicator(self):
"""Returns boolean column indicator where True means the colum is ignored.
Returns:
column_indicator: boolean vector which is True for all ignored column
indices.
"""
return tf.equal(self._match_results, -2)
def num_ignored_columns(self):
"""Returns number (int32 scalar tensor) of matched columns."""
return tf.size(self.ignored_column_indices())
def unmatched_or_ignored_column_indices(self):
"""Returns column indices that are unmatched or ignored.
The indices returned by this op are always sorted in increasing order.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return self._reshape_and_cast(tf.where(tf.greater(0, self._match_results)))
def matched_row_indices(self):
"""Returns row indices that match some column.
The indices returned by this op are ordered so as to be in correspondence
with the output of matched_column_indicator(). For example if
self.matched_column_indicator() is [0,2], and self.matched_row_indices() is
[7, 3], then we know that column 0 was matched to row 7 and column 2 was
matched to row 3.
Returns:
row_indices: int32 tensor of shape [K] with row indices.
"""
return self._reshape_and_cast(
self._gather_op(self._match_results, self.matched_column_indices()))
def _reshape_and_cast(self, t):
return tf.cast(tf.reshape(t, [-1]), tf.int32)
def gather_based_on_match(self, input_tensor, unmatched_value,
ignored_value):
"""Gathers elements from `input_tensor` based on match results.
For columns that are matched to a row, gathered_tensor[col] is set to
input_tensor[match_results[col]]. For columns that are unmatched,
gathered_tensor[col] is set to unmatched_value. Finally, for columns that
are ignored gathered_tensor[col] is set to ignored_value.
Note that the input_tensor.shape[1:] must match with unmatched_value.shape
and ignored_value.shape
Args:
input_tensor: Tensor to gather values from.
unmatched_value: Constant tensor value for unmatched columns.
ignored_value: Constant tensor value for ignored columns.
Returns:
gathered_tensor: A tensor containing values gathered from input_tensor.
The shape of the gathered tensor is [match_results.shape[0]] +
input_tensor.shape[1:].
"""
input_tensor = tf.concat([tf.stack([ignored_value, unmatched_value]),
input_tensor], axis=0)
gather_indices = tf.maximum(self.match_results + 2, 0)
gathered_tensor = self._gather_op(input_tensor, gather_indices)
return gathered_tensor
class Matcher(object):
"""Abstract base class for matcher.
"""
__metaclass__ = ABCMeta
def __init__(self, use_matmul_gather=False):
"""Constructs a Matcher.
Args:
use_matmul_gather: Force constructed match objects to use matrix
multiplication based gather instead of standard tf.gather.
(Default: False).
"""
self._use_matmul_gather = use_matmul_gather
def match(self, similarity_matrix, scope=None, **params):
"""Computes matches among row and column indices and returns the result.
Computes matches among the row and column indices based on the similarity
matrix and optional arguments.
Args:
similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
where higher value means more similar.
scope: Op scope name. Defaults to 'Match' if None.
**params: Additional keyword arguments for specific implementations of
the Matcher.
Returns:
A Match object with the results of matching.
"""
with tf.name_scope(scope, 'Match', [similarity_matrix, params]) as scope:
return Match(self._match(similarity_matrix, **params),
self._use_matmul_gather)
@abstractmethod
def _match(self, similarity_matrix, **params):
"""Method to be overridden by implementations.
Args:
similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
where higher value means more similar.
**params: Additional keyword arguments for specific implementations of
the Matcher.
Returns:
match_results: Integer tensor of shape [M]: match_results[i]>=0 means
that column i is matched to row match_results[i], match_results[i]=-1
means that the column is not matched. match_results[i]=-2 means that
the column is ignored (usually this happens when there is a very weak
match which one neither wants as positive nor negative example).
"""
pass
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.matcher."""
import numpy as np
import tensorflow as tf
from object_detection.core import matcher
class MatchTest(tf.test.TestCase):
def test_get_correct_matched_columnIndices(self):
match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
match = matcher.Match(match_results)
expected_column_indices = [0, 1, 3, 5]
matched_column_indices = match.matched_column_indices()
self.assertEquals(matched_column_indices.dtype, tf.int32)
with self.test_session() as sess:
matched_column_indices = sess.run(matched_column_indices)
self.assertAllEqual(matched_column_indices, expected_column_indices)
def test_get_correct_counts(self):
match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
match = matcher.Match(match_results)
exp_num_matched_columns = 4
exp_num_unmatched_columns = 2
exp_num_ignored_columns = 1
num_matched_columns = match.num_matched_columns()
num_unmatched_columns = match.num_unmatched_columns()
num_ignored_columns = match.num_ignored_columns()
self.assertEquals(num_matched_columns.dtype, tf.int32)
self.assertEquals(num_unmatched_columns.dtype, tf.int32)
self.assertEquals(num_ignored_columns.dtype, tf.int32)
with self.test_session() as sess:
(num_matched_columns_out, num_unmatched_columns_out,
num_ignored_columns_out) = sess.run(
[num_matched_columns, num_unmatched_columns, num_ignored_columns])
self.assertAllEqual(num_matched_columns_out, exp_num_matched_columns)
self.assertAllEqual(num_unmatched_columns_out, exp_num_unmatched_columns)
self.assertAllEqual(num_ignored_columns_out, exp_num_ignored_columns)
def testGetCorrectUnmatchedColumnIndices(self):
match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
match = matcher.Match(match_results)
expected_column_indices = [2, 4]
unmatched_column_indices = match.unmatched_column_indices()
self.assertEquals(unmatched_column_indices.dtype, tf.int32)
with self.test_session() as sess:
unmatched_column_indices = sess.run(unmatched_column_indices)
self.assertAllEqual(unmatched_column_indices, expected_column_indices)
def testGetCorrectMatchedRowIndices(self):
match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
match = matcher.Match(match_results)
expected_row_indices = [3, 1, 0, 5]
matched_row_indices = match.matched_row_indices()
self.assertEquals(matched_row_indices.dtype, tf.int32)
with self.test_session() as sess:
matched_row_inds = sess.run(matched_row_indices)
self.assertAllEqual(matched_row_inds, expected_row_indices)
def test_get_correct_ignored_column_indices(self):
match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
match = matcher.Match(match_results)
expected_column_indices = [6]
ignored_column_indices = match.ignored_column_indices()
self.assertEquals(ignored_column_indices.dtype, tf.int32)
with self.test_session() as sess:
ignored_column_indices = sess.run(ignored_column_indices)
self.assertAllEqual(ignored_column_indices, expected_column_indices)
def test_get_correct_matched_column_indicator(self):
match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
match = matcher.Match(match_results)
expected_column_indicator = [True, True, False, True, False, True, False]
matched_column_indicator = match.matched_column_indicator()
self.assertEquals(matched_column_indicator.dtype, tf.bool)
with self.test_session() as sess:
matched_column_indicator = sess.run(matched_column_indicator)
self.assertAllEqual(matched_column_indicator, expected_column_indicator)
def test_get_correct_unmatched_column_indicator(self):
match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
match = matcher.Match(match_results)
expected_column_indicator = [False, False, True, False, True, False, False]
unmatched_column_indicator = match.unmatched_column_indicator()
self.assertEquals(unmatched_column_indicator.dtype, tf.bool)
with self.test_session() as sess:
unmatched_column_indicator = sess.run(unmatched_column_indicator)
self.assertAllEqual(unmatched_column_indicator, expected_column_indicator)
def test_get_correct_ignored_column_indicator(self):
match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
match = matcher.Match(match_results)
expected_column_indicator = [False, False, False, False, False, False, True]
ignored_column_indicator = match.ignored_column_indicator()
self.assertEquals(ignored_column_indicator.dtype, tf.bool)
with self.test_session() as sess:
ignored_column_indicator = sess.run(ignored_column_indicator)
self.assertAllEqual(ignored_column_indicator, expected_column_indicator)
def test_get_correct_unmatched_ignored_column_indices(self):
match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
match = matcher.Match(match_results)
expected_column_indices = [2, 4, 6]
unmatched_ignored_column_indices = (match.
unmatched_or_ignored_column_indices())
self.assertEquals(unmatched_ignored_column_indices.dtype, tf.int32)
with self.test_session() as sess:
unmatched_ignored_column_indices = sess.run(
unmatched_ignored_column_indices)
self.assertAllEqual(unmatched_ignored_column_indices,
expected_column_indices)
def test_all_columns_accounted_for(self):
# Note: deliberately setting to small number so not always
# all possibilities appear (matched, unmatched, ignored)
num_matches = 10
match_results = tf.random_uniform(
[num_matches], minval=-2, maxval=5, dtype=tf.int32)
match = matcher.Match(match_results)
matched_column_indices = match.matched_column_indices()
unmatched_column_indices = match.unmatched_column_indices()
ignored_column_indices = match.ignored_column_indices()
with self.test_session() as sess:
matched, unmatched, ignored = sess.run([
matched_column_indices, unmatched_column_indices,
ignored_column_indices
])
all_indices = np.hstack((matched, unmatched, ignored))
all_indices_sorted = np.sort(all_indices)
self.assertAllEqual(all_indices_sorted,
np.arange(num_matches, dtype=np.int32))
def test_scalar_gather_based_on_match(self):
match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
input_tensor = tf.constant([0, 1, 2, 3, 4, 5, 6, 7], dtype=tf.float32)
expected_gathered_tensor = [3, 1, 100, 0, 100, 5, 200]
match = matcher.Match(match_results)
gathered_tensor = match.gather_based_on_match(input_tensor,
unmatched_value=100.,
ignored_value=200.)
self.assertEquals(gathered_tensor.dtype, tf.float32)
with self.test_session():
gathered_tensor_out = gathered_tensor.eval()
self.assertAllEqual(expected_gathered_tensor, gathered_tensor_out)
def test_multidimensional_gather_based_on_match(self):
match_results = tf.constant([1, -1, -2])
input_tensor = tf.constant([[0, 0.5, 0, 0.5], [0, 0, 0.5, 0.5]],
dtype=tf.float32)
expected_gathered_tensor = [[0, 0, 0.5, 0.5], [0, 0, 0, 0], [0, 0, 0, 0]]
match = matcher.Match(match_results)
gathered_tensor = match.gather_based_on_match(input_tensor,
unmatched_value=tf.zeros(4),
ignored_value=tf.zeros(4))
self.assertEquals(gathered_tensor.dtype, tf.float32)
with self.test_session():
gathered_tensor_out = gathered_tensor.eval()
self.assertAllEqual(expected_gathered_tensor, gathered_tensor_out)
def test_multidimensional_gather_based_on_match_with_matmul_gather_op(self):
match_results = tf.constant([1, -1, -2])
input_tensor = tf.constant([[0, 0.5, 0, 0.5], [0, 0, 0.5, 0.5]],
dtype=tf.float32)
expected_gathered_tensor = [[0, 0, 0.5, 0.5], [0, 0, 0, 0], [0, 0, 0, 0]]
match = matcher.Match(match_results, use_matmul_gather=True)
gathered_tensor = match.gather_based_on_match(input_tensor,
unmatched_value=tf.zeros(4),
ignored_value=tf.zeros(4))
self.assertEquals(gathered_tensor.dtype, tf.float32)
with self.test_session() as sess:
self.assertTrue(
all([op.name is not 'Gather' for op in sess.graph.get_operations()]))
gathered_tensor_out = gathered_tensor.eval()
self.assertAllEqual(expected_gathered_tensor, gathered_tensor_out)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Base minibatch sampler module.
The job of the minibatch_sampler is to subsample a minibatch based on some
criterion.
The main function call is:
subsample(indicator, batch_size, **params).
Indicator is a 1d boolean tensor where True denotes which examples can be
sampled. It returns a boolean indicator where True denotes an example has been
sampled..
Subclasses should implement the Subsample function and can make use of the
@staticmethod SubsampleIndicator.
"""
from abc import ABCMeta
from abc import abstractmethod
import tensorflow as tf
from object_detection.utils import ops
class MinibatchSampler(object):
"""Abstract base class for subsampling minibatches."""
__metaclass__ = ABCMeta
def __init__(self):
"""Constructs a minibatch sampler."""
pass
@abstractmethod
def subsample(self, indicator, batch_size, **params):
"""Returns subsample of entries in indicator.
Args:
indicator: boolean tensor of shape [N] whose True entries can be sampled.
batch_size: desired batch size.
**params: additional keyword arguments for specific implementations of
the MinibatchSampler.
Returns:
sample_indicator: boolean tensor of shape [N] whose True entries have been
sampled. If sum(indicator) >= batch_size, sum(is_sampled) = batch_size
"""
pass
@staticmethod
def subsample_indicator(indicator, num_samples):
"""Subsample indicator vector.
Given a boolean indicator vector with M elements set to `True`, the function
assigns all but `num_samples` of these previously `True` elements to
`False`. If `num_samples` is greater than M, the original indicator vector
is returned.
Args:
indicator: a 1-dimensional boolean tensor indicating which elements
are allowed to be sampled and which are not.
num_samples: int32 scalar tensor
Returns:
a boolean tensor with the same shape as input (indicator) tensor
"""
indices = tf.where(indicator)
indices = tf.random_shuffle(indices)
indices = tf.reshape(indices, [-1])
num_samples = tf.minimum(tf.size(indices), num_samples)
selected_indices = tf.slice(indices, [0], tf.reshape(num_samples, [1]))
selected_indicator = ops.indices_to_dense_vector(selected_indices,
tf.shape(indicator)[0])
return tf.equal(selected_indicator, 1)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for google3.research.vale.object_detection.minibatch_sampler."""
import numpy as np
import tensorflow as tf
from object_detection.core import minibatch_sampler
class MinibatchSamplerTest(tf.test.TestCase):
def test_subsample_indicator_when_more_true_elements_than_num_samples(self):
np_indicator = [True, False, True, False, True, True, False]
indicator = tf.constant(np_indicator)
samples = minibatch_sampler.MinibatchSampler.subsample_indicator(
indicator, 3)
with self.test_session() as sess:
samples_out = sess.run(samples)
self.assertTrue(np.sum(samples_out), 3)
self.assertAllEqual(samples_out,
np.logical_and(samples_out, np_indicator))
def test_subsample_when_more_true_elements_than_num_samples_no_shape(self):
np_indicator = [True, False, True, False, True, True, False]
indicator = tf.placeholder(tf.bool)
feed_dict = {indicator: np_indicator}
samples = minibatch_sampler.MinibatchSampler.subsample_indicator(
indicator, 3)
with self.test_session() as sess:
samples_out = sess.run(samples, feed_dict=feed_dict)
self.assertTrue(np.sum(samples_out), 3)
self.assertAllEqual(samples_out,
np.logical_and(samples_out, np_indicator))
def test_subsample_indicator_when_less_true_elements_than_num_samples(self):
np_indicator = [True, False, True, False, True, True, False]
indicator = tf.constant(np_indicator)
samples = minibatch_sampler.MinibatchSampler.subsample_indicator(
indicator, 5)
with self.test_session() as sess:
samples_out = sess.run(samples)
self.assertTrue(np.sum(samples_out), 4)
self.assertAllEqual(samples_out,
np.logical_and(samples_out, np_indicator))
def test_subsample_indicator_when_num_samples_is_zero(self):
np_indicator = [True, False, True, False, True, True, False]
indicator = tf.constant(np_indicator)
samples_none = minibatch_sampler.MinibatchSampler.subsample_indicator(
indicator, 0)
with self.test_session() as sess:
samples_none_out = sess.run(samples_none)
self.assertAllEqual(
np.zeros_like(samples_none_out, dtype=bool),
samples_none_out)
def test_subsample_indicator_when_indicator_all_false(self):
indicator_empty = tf.zeros([0], dtype=tf.bool)
samples_empty = minibatch_sampler.MinibatchSampler.subsample_indicator(
indicator_empty, 4)
with self.test_session() as sess:
samples_empty_out = sess.run(samples_empty)
self.assertEqual(0, samples_empty_out.size)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Abstract detection model.
This file defines a generic base class for detection models. Programs that are
designed to work with arbitrary detection models should only depend on this
class. We intend for the functions in this class to follow tensor-in/tensor-out
design, thus all functions have tensors or lists/dictionaries holding tensors as
inputs and outputs.
Abstractly, detection models predict output tensors given input images
which can be passed to a loss function at training time or passed to a
postprocessing function at eval time. The computation graphs at a high level
consequently look as follows:
Training time:
inputs (images tensor) -> preprocess -> predict -> loss -> outputs (loss tensor)
Evaluation time:
inputs (images tensor) -> preprocess -> predict -> postprocess
-> outputs (boxes tensor, scores tensor, classes tensor, num_detections tensor)
DetectionModels must thus implement four functions (1) preprocess, (2) predict,
(3) postprocess and (4) loss. DetectionModels should make no assumptions about
the input size or aspect ratio --- they are responsible for doing any
resize/reshaping necessary (see docstring for the preprocess function).
Output classes are always integers in the range [0, num_classes). Any mapping
of these integers to semantic labels is to be handled outside of this class.
Images are resized in the `preprocess` method. All of `preprocess`, `predict`,
and `postprocess` should be reentrant.
The `preprocess` method runs `image_resizer_fn` that returns resized_images and
`true_image_shapes`. Since `image_resizer_fn` can pad the images with zeros,
true_image_shapes indicate the slices that contain the image without padding.
This is useful for padding images to be a fixed size for batching.
The `postprocess` method uses the true image shapes to clip predictions that lie
outside of images.
By default, DetectionModels produce bounding box detections; However, we support
a handful of auxiliary annotations associated with each bounding box, namely,
instance masks and keypoints.
"""
from abc import ABCMeta
from abc import abstractmethod
from object_detection.core import standard_fields as fields
class DetectionModel(object):
"""Abstract base class for detection models."""
__metaclass__ = ABCMeta
def __init__(self, num_classes):
"""Constructor.
Args:
num_classes: number of classes. Note that num_classes *does not* include
background categories that might be implicitly predicted in various
implementations.
"""
self._num_classes = num_classes
self._groundtruth_lists = {}
@property
def num_classes(self):
return self._num_classes
def groundtruth_lists(self, field):
"""Access list of groundtruth tensors.
Args:
field: a string key, options are
fields.BoxListFields.{boxes,classes,masks,keypoints}
Returns:
a list of tensors holding groundtruth information (see also
provide_groundtruth function below), with one entry for each image in the
batch.
Raises:
RuntimeError: if the field has not been provided via provide_groundtruth.
"""
if field not in self._groundtruth_lists:
raise RuntimeError('Groundtruth tensor %s has not been provided', field)
return self._groundtruth_lists[field]
def groundtruth_has_field(self, field):
"""Determines whether the groundtruth includes the given field.
Args:
field: a string key, options are
fields.BoxListFields.{boxes,classes,masks,keypoints}
Returns:
True if the groundtruth includes the given field, False otherwise.
"""
return field in self._groundtruth_lists
@abstractmethod
def preprocess(self, inputs):
"""Input preprocessing.
To be overridden by implementations.
This function is responsible for any scaling/shifting of input values that
is necessary prior to running the detector on an input image.
It is also responsible for any resizing, padding that might be necessary
as images are assumed to arrive in arbitrary sizes. While this function
could conceivably be part of the predict method (below), it is often
convenient to keep these separate --- for example, we may want to preprocess
on one device, place onto a queue, and let another device (e.g., the GPU)
handle prediction.
A few important notes about the preprocess function:
+ We assume that this operation does not have any trainable variables nor
does it affect the groundtruth annotations in any way (thus data
augmentation operations such as random cropping should be performed
externally).
+ There is no assumption that the batchsize in this function is the same as
the batch size in the predict function. In fact, we recommend calling the
preprocess function prior to calling any batching operations (which should
happen outside of the model) and thus assuming that batch sizes are equal
to 1 in the preprocess function.
+ There is also no explicit assumption that the output resolutions
must be fixed across inputs --- this is to support "fully convolutional"
settings in which input images can have different shapes/resolutions.
Args:
inputs: a [batch, height_in, width_in, channels] float32 tensor
representing a batch of images with values between 0 and 255.0.
Returns:
preprocessed_inputs: a [batch, height_out, width_out, channels] float32
tensor representing a batch of images.
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
"""
pass
@abstractmethod
def predict(self, preprocessed_inputs, true_image_shapes):
"""Predict prediction tensors from inputs tensor.
Outputs of this function can be passed to loss or postprocess functions.
Args:
preprocessed_inputs: a [batch, height, width, channels] float32 tensor
representing a batch of images.
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
Returns:
prediction_dict: a dictionary holding prediction tensors to be
passed to the Loss or Postprocess functions.
"""
pass
@abstractmethod
def postprocess(self, prediction_dict, true_image_shapes, **params):
"""Convert predicted output tensors to final detections.
Outputs adhere to the following conventions:
* Classes are integers in [0, num_classes); background classes are removed
and the first non-background class is mapped to 0. If the model produces
class-agnostic detections, then no output is produced for classes.
* Boxes are to be interpreted as being in [y_min, x_min, y_max, x_max]
format and normalized relative to the image window.
* `num_detections` is provided for settings where detections are padded to a
fixed number of boxes.
* We do not specifically assume any kind of probabilistic interpretation
of the scores --- the only important thing is their relative ordering.
Thus implementations of the postprocess function are free to output
logits, probabilities, calibrated probabilities, or anything else.
Args:
prediction_dict: a dictionary holding prediction tensors.
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
**params: Additional keyword arguments for specific implementations of
DetectionModel.
Returns:
detections: a dictionary containing the following fields
detection_boxes: [batch, max_detections, 4]
detection_scores: [batch, max_detections]
detection_classes: [batch, max_detections]
(If a model is producing class-agnostic detections, this field may be
missing)
instance_masks: [batch, max_detections, image_height, image_width]
(optional)
keypoints: [batch, max_detections, num_keypoints, 2] (optional)
num_detections: [batch]
"""
pass
@abstractmethod
def loss(self, prediction_dict, true_image_shapes):
"""Compute scalar loss tensors with respect to provided groundtruth.
Calling this function requires that groundtruth tensors have been
provided via the provide_groundtruth function.
Args:
prediction_dict: a dictionary holding predicted tensors
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
Returns:
a dictionary mapping strings (loss names) to scalar tensors representing
loss values.
"""
pass
def provide_groundtruth(self,
groundtruth_boxes_list,
groundtruth_classes_list,
groundtruth_masks_list=None,
groundtruth_keypoints_list=None,
groundtruth_weights_list=None,
groundtruth_is_crowd_list=None):
"""Provide groundtruth tensors.
Args:
groundtruth_boxes_list: a list of 2-D tf.float32 tensors of shape
[num_boxes, 4] containing coordinates of the groundtruth boxes.
Groundtruth boxes are provided in [y_min, x_min, y_max, x_max]
format and assumed to be normalized and clipped
relative to the image window with y_min <= y_max and x_min <= x_max.
groundtruth_classes_list: a list of 2-D tf.float32 one-hot (or k-hot)
tensors of shape [num_boxes, num_classes] containing the class targets
with the 0th index assumed to map to the first non-background class.
groundtruth_masks_list: a list of 3-D tf.float32 tensors of
shape [num_boxes, height_in, width_in] containing instance
masks with values in {0, 1}. If None, no masks are provided.
Mask resolution `height_in`x`width_in` must agree with the resolution
of the input image tensor provided to the `preprocess` function.
groundtruth_keypoints_list: a list of 3-D tf.float32 tensors of
shape [num_boxes, num_keypoints, 2] containing keypoints.
Keypoints are assumed to be provided in normalized coordinates and
missing keypoints should be encoded as NaN.
groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape
[num_boxes] containing weights for groundtruth boxes.
groundtruth_is_crowd_list: A list of 1-D tf.bool tensors of shape
[num_boxes] containing is_crowd annotations
"""
self._groundtruth_lists[fields.BoxListFields.boxes] = groundtruth_boxes_list
self._groundtruth_lists[
fields.BoxListFields.classes] = groundtruth_classes_list
if groundtruth_weights_list:
self._groundtruth_lists[fields.BoxListFields.
weights] = groundtruth_weights_list
if groundtruth_masks_list:
self._groundtruth_lists[
fields.BoxListFields.masks] = groundtruth_masks_list
if groundtruth_keypoints_list:
self._groundtruth_lists[
fields.BoxListFields.keypoints] = groundtruth_keypoints_list
if groundtruth_is_crowd_list:
self._groundtruth_lists[
fields.BoxListFields.is_crowd] = groundtruth_is_crowd_list
@abstractmethod
def restore_map(self, fine_tune_checkpoint_type='detection'):
"""Returns a map of variables to load from a foreign checkpoint.
Returns a map of variable names to load from a checkpoint to variables in
the model graph. This enables the model to initialize based on weights from
another task. For example, the feature extractor variables from a
classification model can be used to bootstrap training of an object
detector. When loading from an object detection model, the checkpoint model
should have the same parameters as this detection model with exception of
the num_classes parameter.
Args:
fine_tune_checkpoint_type: whether to restore from a full detection
checkpoint (with compatible variable names) or to restore from a
classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`. Default 'detection'.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
the model graph.
"""
pass
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Post-processing operations on detected boxes."""
import tensorflow as tf
from object_detection.core import box_list
from object_detection.core import box_list_ops
from object_detection.core import standard_fields as fields
from object_detection.utils import shape_utils
def multiclass_non_max_suppression(boxes,
scores,
score_thresh,
iou_thresh,
max_size_per_class,
max_total_size=0,
clip_window=None,
change_coordinate_frame=False,
masks=None,
boundaries=None,
additional_fields=None,
scope=None):
"""Multi-class version of non maximum suppression.
This op greedily selects a subset of detection bounding boxes, pruning
away boxes that have high IOU (intersection over union) overlap (> thresh)
with already selected boxes. It operates independently for each class for
which scores are provided (via the scores field of the input box_list),
pruning boxes with score less than a provided threshold prior to
applying NMS.
Please note that this operation is performed on *all* classes, therefore any
background classes should be removed prior to calling this function.
Args:
boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either
number of classes or 1 depending on whether a separate box is predicted
per class.
scores: A [k, num_classes] float32 tensor containing the scores for each of
the k detections.
score_thresh: scalar threshold for score (low scoring boxes are removed).
iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
with previously selected boxes are removed).
max_size_per_class: maximum number of retained boxes per class.
max_total_size: maximum number of boxes retained over all classes. By
default returns all boxes retained after capping boxes per class.
clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
representing the window to clip and normalize boxes to before performing
non-max suppression.
change_coordinate_frame: Whether to normalize coordinates after clipping
relative to clip_window (this can only be set to True if a clip_window
is provided)
masks: (optional) a [k, q, mask_height, mask_width] float32 tensor
containing box masks. `q` can be either number of classes or 1 depending
on whether a separate mask is predicted per class.
boundaries: (optional) a [k, q, boundary_height, boundary_width] float32
tensor containing box boundaries. `q` can be either number of classes or 1
depending on whether a separate boundary is predicted per class.
additional_fields: (optional) If not None, a dictionary that maps keys to
tensors whose first dimensions are all of size `k`. After non-maximum
suppression, all tensors corresponding to the selected boxes will be
added to resulting BoxList.
scope: name scope.
Returns:
a BoxList holding M boxes with a rank-1 scores field representing
corresponding scores for each box with scores sorted in decreasing order
and a rank-1 classes field representing a class label for each box.
Raises:
ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
a valid scores field.
"""
if not 0 <= iou_thresh <= 1.0:
raise ValueError('iou_thresh must be between 0 and 1')
if scores.shape.ndims != 2:
raise ValueError('scores field must be of rank 2')
if scores.shape[1].value is None:
raise ValueError('scores must have statically defined second '
'dimension')
if boxes.shape.ndims != 3:
raise ValueError('boxes must be of rank 3.')
if not (boxes.shape[1].value == scores.shape[1].value or
boxes.shape[1].value == 1):
raise ValueError('second dimension of boxes must be either 1 or equal '
'to the second dimension of scores')
if boxes.shape[2].value != 4:
raise ValueError('last dimension of boxes must be of size 4.')
if change_coordinate_frame and clip_window is None:
raise ValueError('if change_coordinate_frame is True, then a clip_window'
'must be specified.')
with tf.name_scope(scope, 'MultiClassNonMaxSuppression'):
num_boxes = tf.shape(boxes)[0]
num_scores = tf.shape(scores)[0]
num_classes = scores.get_shape()[1]
length_assert = tf.Assert(
tf.equal(num_boxes, num_scores),
['Incorrect scores field length: actual vs expected.',
num_scores, num_boxes])
selected_boxes_list = []
per_class_boxes_list = tf.unstack(boxes, axis=1)
if masks is not None:
per_class_masks_list = tf.unstack(masks, axis=1)
if boundaries is not None:
per_class_boundaries_list = tf.unstack(boundaries, axis=1)
boxes_ids = (range(num_classes) if len(per_class_boxes_list) > 1
else [0] * num_classes.value)
for class_idx, boxes_idx in zip(range(num_classes), boxes_ids):
per_class_boxes = per_class_boxes_list[boxes_idx]
boxlist_and_class_scores = box_list.BoxList(per_class_boxes)
with tf.control_dependencies([length_assert]):
class_scores = tf.reshape(
tf.slice(scores, [0, class_idx], tf.stack([num_scores, 1])), [-1])
boxlist_and_class_scores.add_field(fields.BoxListFields.scores,
class_scores)
if masks is not None:
per_class_masks = per_class_masks_list[boxes_idx]
boxlist_and_class_scores.add_field(fields.BoxListFields.masks,
per_class_masks)
if boundaries is not None:
per_class_boundaries = per_class_boundaries_list[boxes_idx]
boxlist_and_class_scores.add_field(fields.BoxListFields.boundaries,
per_class_boundaries)
if additional_fields is not None:
for key, tensor in additional_fields.items():
boxlist_and_class_scores.add_field(key, tensor)
boxlist_filtered = box_list_ops.filter_greater_than(
boxlist_and_class_scores, score_thresh)
if clip_window is not None:
boxlist_filtered = box_list_ops.clip_to_window(
boxlist_filtered, clip_window)
if change_coordinate_frame:
boxlist_filtered = box_list_ops.change_coordinate_frame(
boxlist_filtered, clip_window)
max_selection_size = tf.minimum(max_size_per_class,
boxlist_filtered.num_boxes())
selected_indices = tf.image.non_max_suppression(
boxlist_filtered.get(),
boxlist_filtered.get_field(fields.BoxListFields.scores),
max_selection_size,
iou_threshold=iou_thresh)
nms_result = box_list_ops.gather(boxlist_filtered, selected_indices)
nms_result.add_field(
fields.BoxListFields.classes, (tf.zeros_like(
nms_result.get_field(fields.BoxListFields.scores)) + class_idx))
selected_boxes_list.append(nms_result)
selected_boxes = box_list_ops.concatenate(selected_boxes_list)
sorted_boxes = box_list_ops.sort_by_field(selected_boxes,
fields.BoxListFields.scores)
if max_total_size:
max_total_size = tf.minimum(max_total_size,
sorted_boxes.num_boxes())
sorted_boxes = box_list_ops.gather(sorted_boxes,
tf.range(max_total_size))
return sorted_boxes
def batch_multiclass_non_max_suppression(boxes,
scores,
score_thresh,
iou_thresh,
max_size_per_class,
max_total_size=0,
clip_window=None,
change_coordinate_frame=False,
num_valid_boxes=None,
masks=None,
additional_fields=None,
scope=None,
parallel_iterations=32):
"""Multi-class version of non maximum suppression that operates on a batch.
This op is similar to `multiclass_non_max_suppression` but operates on a batch
of boxes and scores. See documentation for `multiclass_non_max_suppression`
for details.
Args:
boxes: A [batch_size, num_anchors, q, 4] float32 tensor containing
detections. If `q` is 1 then same boxes are used for all classes
otherwise, if `q` is equal to number of classes, class-specific boxes
are used.
scores: A [batch_size, num_anchors, num_classes] float32 tensor containing
the scores for each of the `num_anchors` detections.
score_thresh: scalar threshold for score (low scoring boxes are removed).
iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
with previously selected boxes are removed).
max_size_per_class: maximum number of retained boxes per class.
max_total_size: maximum number of boxes retained over all classes. By
default returns all boxes retained after capping boxes per class.
clip_window: A float32 tensor of shape [batch_size, 4] where each entry is
of the form [y_min, x_min, y_max, x_max] representing the window to clip
boxes to before performing non-max suppression. This argument can also be
a tensor of shape [4] in which case, the same clip window is applied to
all images in the batch. If clip_widow is None, all boxes are used to
perform non-max suppression.
change_coordinate_frame: Whether to normalize coordinates after clipping
relative to clip_window (this can only be set to True if a clip_window
is provided)
num_valid_boxes: (optional) a Tensor of type `int32`. A 1-D tensor of shape
[batch_size] representing the number of valid boxes to be considered
for each image in the batch. This parameter allows for ignoring zero
paddings.
masks: (optional) a [batch_size, num_anchors, q, mask_height, mask_width]
float32 tensor containing box masks. `q` can be either number of classes
or 1 depending on whether a separate mask is predicted per class.
additional_fields: (optional) If not None, a dictionary that maps keys to
tensors whose dimensions are [batch_size, num_anchors, ...].
scope: tf scope name.
parallel_iterations: (optional) number of batch items to process in
parallel.
Returns:
'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor
containing the non-max suppressed boxes.
'nmsed_scores': A [batch_size, max_detections] float32 tensor containing
the scores for the boxes.
'nmsed_classes': A [batch_size, max_detections] float32 tensor
containing the class for boxes.
'nmsed_masks': (optional) a
[batch_size, max_detections, mask_height, mask_width] float32 tensor
containing masks for each selected box. This is set to None if input
`masks` is None.
'nmsed_additional_fields': (optional) a dictionary of
[batch_size, max_detections, ...] float32 tensors corresponding to the
tensors specified in the input `additional_fields`. This is not returned
if input `additional_fields` is None.
'num_detections': A [batch_size] int32 tensor indicating the number of
valid detections per batch item. Only the top num_detections[i] entries in
nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the
entries are zero paddings.
Raises:
ValueError: if `q` in boxes.shape is not 1 or not equal to number of
classes as inferred from scores.shape.
"""
q = boxes.shape[2].value
num_classes = scores.shape[2].value
if q != 1 and q != num_classes:
raise ValueError('third dimension of boxes must be either 1 or equal '
'to the third dimension of scores')
if change_coordinate_frame and clip_window is None:
raise ValueError('if change_coordinate_frame is True, then a clip_window'
'must be specified.')
original_masks = masks
original_additional_fields = additional_fields
with tf.name_scope(scope, 'BatchMultiClassNonMaxSuppression'):
boxes_shape = boxes.shape
batch_size = boxes_shape[0].value
num_anchors = boxes_shape[1].value
if batch_size is None:
batch_size = tf.shape(boxes)[0]
if num_anchors is None:
num_anchors = tf.shape(boxes)[1]
# If num valid boxes aren't provided, create one and mark all boxes as
# valid.
if num_valid_boxes is None:
num_valid_boxes = tf.ones([batch_size], dtype=tf.int32) * num_anchors
# If masks aren't provided, create dummy masks so we can only have one copy
# of _single_image_nms_fn and discard the dummy masks after map_fn.
if masks is None:
masks_shape = tf.stack([batch_size, num_anchors, 1, 0, 0])
masks = tf.zeros(masks_shape)
if clip_window is None:
clip_window = tf.stack([
tf.reduce_min(boxes[:, :, :, 0]),
tf.reduce_min(boxes[:, :, :, 1]),
tf.reduce_max(boxes[:, :, :, 2]),
tf.reduce_max(boxes[:, :, :, 3])
])
if clip_window.shape.ndims == 1:
clip_window = tf.tile(tf.expand_dims(clip_window, 0), [batch_size, 1])
if additional_fields is None:
additional_fields = {}
def _single_image_nms_fn(args):
"""Runs NMS on a single image and returns padded output.
Args:
args: A list of tensors consisting of the following:
per_image_boxes - A [num_anchors, q, 4] float32 tensor containing
detections. If `q` is 1 then same boxes are used for all classes
otherwise, if `q` is equal to number of classes, class-specific
boxes are used.
per_image_scores - A [num_anchors, num_classes] float32 tensor
containing the scores for each of the `num_anchors` detections.
per_image_masks - A [num_anchors, q, mask_height, mask_width] float32
tensor containing box masks. `q` can be either number of classes
or 1 depending on whether a separate mask is predicted per class.
per_image_clip_window - A 1D float32 tensor of the form
[ymin, xmin, ymax, xmax] representing the window to clip the boxes
to.
per_image_additional_fields - (optional) A variable number of float32
tensors each with size [num_anchors, ...].
per_image_num_valid_boxes - A tensor of type `int32`. A 1-D tensor of
shape [batch_size] representing the number of valid boxes to be
considered for each image in the batch. This parameter allows for
ignoring zero paddings.
Returns:
'nmsed_boxes': A [max_detections, 4] float32 tensor containing the
non-max suppressed boxes.
'nmsed_scores': A [max_detections] float32 tensor containing the scores
for the boxes.
'nmsed_classes': A [max_detections] float32 tensor containing the class
for boxes.
'nmsed_masks': (optional) a [max_detections, mask_height, mask_width]
float32 tensor containing masks for each selected box. This is set to
None if input `masks` is None.
'nmsed_additional_fields': (optional) A variable number of float32
tensors each with size [max_detections, ...] corresponding to the
input `per_image_additional_fields`.
'num_detections': A [batch_size] int32 tensor indicating the number of
valid detections per batch item. Only the top num_detections[i]
entries in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The
rest of the entries are zero paddings.
"""
per_image_boxes = args[0]
per_image_scores = args[1]
per_image_masks = args[2]
per_image_clip_window = args[3]
per_image_additional_fields = {
key: value
for key, value in zip(additional_fields, args[4:-1])
}
per_image_num_valid_boxes = args[-1]
per_image_boxes = tf.reshape(
tf.slice(per_image_boxes, 3 * [0],
tf.stack([per_image_num_valid_boxes, -1, -1])), [-1, q, 4])
per_image_scores = tf.reshape(
tf.slice(per_image_scores, [0, 0],
tf.stack([per_image_num_valid_boxes, -1])),
[-1, num_classes])
per_image_masks = tf.reshape(
tf.slice(per_image_masks, 4 * [0],
tf.stack([per_image_num_valid_boxes, -1, -1, -1])),
[-1, q, per_image_masks.shape[2].value,
per_image_masks.shape[3].value])
if per_image_additional_fields is not None:
for key, tensor in per_image_additional_fields.items():
additional_field_shape = tensor.get_shape()
additional_field_dim = len(additional_field_shape)
per_image_additional_fields[key] = tf.reshape(
tf.slice(per_image_additional_fields[key],
additional_field_dim * [0],
tf.stack([per_image_num_valid_boxes] +
(additional_field_dim - 1) * [-1])),
[-1] + [dim.value for dim in additional_field_shape[1:]])
nmsed_boxlist = multiclass_non_max_suppression(
per_image_boxes,
per_image_scores,
score_thresh,
iou_thresh,
max_size_per_class,
max_total_size,
clip_window=per_image_clip_window,
change_coordinate_frame=change_coordinate_frame,
masks=per_image_masks,
additional_fields=per_image_additional_fields)
padded_boxlist = box_list_ops.pad_or_clip_box_list(nmsed_boxlist,
max_total_size)
num_detections = nmsed_boxlist.num_boxes()
nmsed_boxes = padded_boxlist.get()
nmsed_scores = padded_boxlist.get_field(fields.BoxListFields.scores)
nmsed_classes = padded_boxlist.get_field(fields.BoxListFields.classes)
nmsed_masks = padded_boxlist.get_field(fields.BoxListFields.masks)
nmsed_additional_fields = [
padded_boxlist.get_field(key) for key in per_image_additional_fields
]
return ([nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks] +
nmsed_additional_fields + [num_detections])
num_additional_fields = 0
if additional_fields is not None:
num_additional_fields = len(additional_fields)
num_nmsed_outputs = 4 + num_additional_fields
batch_outputs = shape_utils.static_or_dynamic_map_fn(
_single_image_nms_fn,
elems=([boxes, scores, masks, clip_window] +
list(additional_fields.values()) + [num_valid_boxes]),
dtype=(num_nmsed_outputs * [tf.float32] + [tf.int32]),
parallel_iterations=parallel_iterations)
batch_nmsed_boxes = batch_outputs[0]
batch_nmsed_scores = batch_outputs[1]
batch_nmsed_classes = batch_outputs[2]
batch_nmsed_masks = batch_outputs[3]
batch_nmsed_additional_fields = {
key: value
for key, value in zip(additional_fields, batch_outputs[4:-1])
}
batch_num_detections = batch_outputs[-1]
if original_masks is None:
batch_nmsed_masks = None
if original_additional_fields is None:
batch_nmsed_additional_fields = None
return (batch_nmsed_boxes, batch_nmsed_scores, batch_nmsed_classes,
batch_nmsed_masks, batch_nmsed_additional_fields,
batch_num_detections)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment