Commit a4944a57 authored by derekjchow's avatar derekjchow Committed by Sergio Guadarrama
Browse files

Add Tensorflow Object Detection API. (#1561)

For details see our paper:
"Speed/accuracy trade-offs for modern convolutional object detectors."
Huang J, Rathod V, Sun C, Zhu M, Korattikara A, Fathi A, Fischer I,
Wojna Z, Song Y, Guadarrama S, Murphy K, CVPR 2017
https://arxiv.org/abs/1611.10012
parent 60c3ed2e
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Numpy BoxList classes and functions."""
import numpy as np
class BoxList(object):
"""Box collection.
BoxList represents a list of bounding boxes as numpy array, where each
bounding box is represented as a row of 4 numbers,
[y_min, x_min, y_max, x_max]. It is assumed that all bounding boxes within a
given list correspond to a single image.
Optionally, users can add additional related fields (such as
objectness/classification scores).
"""
def __init__(self, data):
"""Constructs box collection.
Args:
data: a numpy array of shape [N, 4] representing box coordinates
Raises:
ValueError: if bbox data is not a numpy array
ValueError: if invalid dimensions for bbox data
"""
if not isinstance(data, np.ndarray):
raise ValueError('data must be a numpy array.')
if len(data.shape) != 2 or data.shape[1] != 4:
raise ValueError('Invalid dimensions for box data.')
if data.dtype != np.float32 and data.dtype != np.float64:
raise ValueError('Invalid data type for box data: float is required.')
if not self._is_valid_boxes(data):
raise ValueError('Invalid box data. data must be a numpy array of '
'N*[y_min, x_min, y_max, x_max]')
self.data = {'boxes': data}
def num_boxes(self):
"""Return number of boxes held in collections."""
return self.data['boxes'].shape[0]
def get_extra_fields(self):
"""Return all non-box fields."""
return [k for k in self.data.keys() if k != 'boxes']
def has_field(self, field):
return field in self.data
def add_field(self, field, field_data):
"""Add data to a specified field.
Args:
field: a string parameter used to speficy a related field to be accessed.
field_data: a numpy array of [N, ...] representing the data associated
with the field.
Raises:
ValueError: if the field is already exist or the dimension of the field
data does not matches the number of boxes.
"""
if self.has_field(field):
raise ValueError('Field ' + field + 'already exists')
if len(field_data.shape) < 1 or field_data.shape[0] != self.num_boxes():
raise ValueError('Invalid dimensions for field data')
self.data[field] = field_data
def get(self):
"""Convenience function for accesssing box coordinates.
Returns:
a numpy array of shape [N, 4] representing box corners
"""
return self.get_field('boxes')
def get_field(self, field):
"""Accesses data associated with the specified field in the box collection.
Args:
field: a string parameter used to speficy a related field to be accessed.
Returns:
a numpy 1-d array representing data of an associated field
Raises:
ValueError: if invalid field
"""
if not self.has_field(field):
raise ValueError('field {} does not exist'.format(field))
return self.data[field]
def get_coordinates(self):
"""Get corner coordinates of boxes.
Returns:
a list of 4 1-d numpy arrays [y_min, x_min, y_max, x_max]
"""
box_coordinates = self.get()
y_min = box_coordinates[:, 0]
x_min = box_coordinates[:, 1]
y_max = box_coordinates[:, 2]
x_max = box_coordinates[:, 3]
return [y_min, x_min, y_max, x_max]
def _is_valid_boxes(self, data):
"""Check whether data fullfills the format of N*[ymin, xmin, ymax, xmin].
Args:
data: a numpy array of shape [N, 4] representing box coordinates
Returns:
a boolean indicating whether all ymax of boxes are equal or greater than
ymin, and all xmax of boxes are equal or greater than xmin.
"""
if data.shape[0] > 0:
for i in xrange(data.shape[0]):
if data[i, 0] > data[i, 2] or data[i, 1] > data[i, 3]:
return False
return True
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Bounding Box List operations for Numpy BoxLists.
Example box operations that are supported:
* Areas: compute bounding box areas
* IOU: pairwise intersection-over-union scores
"""
import numpy as np
from object_detection.utils import np_box_list
from object_detection.utils import np_box_ops
class SortOrder(object):
"""Enum class for sort order.
Attributes:
ascend: ascend order.
descend: descend order.
"""
ASCEND = 1
DESCEND = 2
def area(boxlist):
"""Computes area of boxes.
Args:
boxlist: BoxList holding N boxes
Returns:
a numpy array with shape [N*1] representing box areas
"""
y_min, x_min, y_max, x_max = boxlist.get_coordinates()
return (y_max - y_min) * (x_max - x_min)
def intersection(boxlist1, boxlist2):
"""Compute pairwise intersection areas between boxes.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
Returns:
a numpy array with shape [N*M] representing pairwise intersection area
"""
return np_box_ops.intersection(boxlist1.get(), boxlist2.get())
def iou(boxlist1, boxlist2):
"""Computes pairwise intersection-over-union between box collections.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
Returns:
a numpy array with shape [N, M] representing pairwise iou scores.
"""
return np_box_ops.iou(boxlist1.get(), boxlist2.get())
def ioa(boxlist1, boxlist2):
"""Computes pairwise intersection-over-area between box collections.
Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
their intersection area over box2's area. Note that ioa is not symmetric,
that is, IOA(box1, box2) != IOA(box2, box1).
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
Returns:
a numpy array with shape [N, M] representing pairwise ioa scores.
"""
return np_box_ops.ioa(boxlist1.get(), boxlist2.get())
def gather(boxlist, indices, fields=None):
"""Gather boxes from BoxList according to indices and return new BoxList.
By default, Gather returns boxes corresponding to the input index list, as
well as all additional fields stored in the boxlist (indexing into the
first dimension). However one can optionally only gather from a
subset of fields.
Args:
boxlist: BoxList holding N boxes
indices: a 1-d numpy array of type int_
fields: (optional) list of fields to also gather from. If None (default),
all fields are gathered from. Pass an empty fields list to only gather
the box coordinates.
Returns:
subboxlist: a BoxList corresponding to the subset of the input BoxList
specified by indices
Raises:
ValueError: if specified field is not contained in boxlist or if the
indices are not of type int_
"""
if indices.size:
if np.amax(indices) >= boxlist.num_boxes() or np.amin(indices) < 0:
raise ValueError('indices are out of valid range.')
subboxlist = np_box_list.BoxList(boxlist.get()[indices, :])
if fields is None:
fields = boxlist.get_extra_fields()
for field in fields:
extra_field_data = boxlist.get_field(field)
subboxlist.add_field(field, extra_field_data[indices, ...])
return subboxlist
def sort_by_field(boxlist, field, order=SortOrder.DESCEND):
"""Sort boxes and associated fields according to a scalar field.
A common use case is reordering the boxes according to descending scores.
Args:
boxlist: BoxList holding N boxes.
field: A BoxList field for sorting and reordering the BoxList.
order: (Optional) 'descend' or 'ascend'. Default is descend.
Returns:
sorted_boxlist: A sorted BoxList with the field in the specified order.
Raises:
ValueError: if specified field does not exist or is not of single dimension.
ValueError: if the order is not either descend or ascend.
"""
if not boxlist.has_field(field):
raise ValueError('Field ' + field + ' does not exist')
if len(boxlist.get_field(field).shape) != 1:
raise ValueError('Field ' + field + 'should be single dimension.')
if order != SortOrder.DESCEND and order != SortOrder.ASCEND:
raise ValueError('Invalid sort order')
field_to_sort = boxlist.get_field(field)
sorted_indices = np.argsort(field_to_sort)
if order == SortOrder.DESCEND:
sorted_indices = sorted_indices[::-1]
return gather(boxlist, sorted_indices)
def non_max_suppression(boxlist,
max_output_size=10000,
iou_threshold=1.0,
score_threshold=-10.0):
"""Non maximum suppression.
This op greedily selects a subset of detection bounding boxes, pruning
away boxes that have high IOU (intersection over union) overlap (> thresh)
with already selected boxes. In each iteration, the detected bounding box with
highest score in the available pool is selected.
Args:
boxlist: BoxList holding N boxes. Must contain a 'scores' field
representing detection scores. All scores belong to the same class.
max_output_size: maximum number of retained boxes
iou_threshold: intersection over union threshold.
score_threshold: minimum score threshold. Remove the boxes with scores
less than this value. Default value is set to -10. A very
low threshold to pass pretty much all the boxes, unless
the user sets a different score threshold.
Returns:
a BoxList holding M boxes where M <= max_output_size
Raises:
ValueError: if 'scores' field does not exist
ValueError: if threshold is not in [0, 1]
ValueError: if max_output_size < 0
"""
if not boxlist.has_field('scores'):
raise ValueError('Field scores does not exist')
if iou_threshold < 0. or iou_threshold > 1.0:
raise ValueError('IOU threshold must be in [0, 1]')
if max_output_size < 0:
raise ValueError('max_output_size must be bigger than 0.')
boxlist = filter_scores_greater_than(boxlist, score_threshold)
if boxlist.num_boxes() == 0:
return boxlist
boxlist = sort_by_field(boxlist, 'scores')
# Prevent further computation if NMS is disabled.
if iou_threshold == 1.0:
if boxlist.num_boxes() > max_output_size:
selected_indices = np.arange(max_output_size)
return gather(boxlist, selected_indices)
else:
return boxlist
boxes = boxlist.get()
num_boxes = boxlist.num_boxes()
# is_index_valid is True only for all remaining valid boxes,
is_index_valid = np.full(num_boxes, 1, dtype=bool)
selected_indices = []
num_output = 0
for i in xrange(num_boxes):
if num_output < max_output_size:
if is_index_valid[i]:
num_output += 1
selected_indices.append(i)
is_index_valid[i] = False
valid_indices = np.where(is_index_valid)[0]
if valid_indices.size == 0:
break
intersect_over_union = np_box_ops.iou(
np.expand_dims(boxes[i, :], axis=0), boxes[valid_indices, :])
intersect_over_union = np.squeeze(intersect_over_union, axis=0)
is_index_valid[valid_indices] = np.logical_and(
is_index_valid[valid_indices],
intersect_over_union <= iou_threshold)
return gather(boxlist, np.array(selected_indices))
def multi_class_non_max_suppression(boxlist, score_thresh, iou_thresh,
max_output_size):
"""Multi-class version of non maximum suppression.
This op greedily selects a subset of detection bounding boxes, pruning
away boxes that have high IOU (intersection over union) overlap (> thresh)
with already selected boxes. It operates independently for each class for
which scores are provided (via the scores field of the input box_list),
pruning boxes with score less than a provided threshold prior to
applying NMS.
Args:
boxlist: BoxList holding N boxes. Must contain a 'scores' field
representing detection scores. This scores field is a tensor that can
be 1 dimensional (in the case of a single class) or 2-dimensional, which
which case we assume that it takes the shape [num_boxes, num_classes].
We further assume that this rank is known statically and that
scores.shape[1] is also known (i.e., the number of classes is fixed
and known at graph construction time).
score_thresh: scalar threshold for score (low scoring boxes are removed).
iou_thresh: scalar threshold for IOU (boxes that that high IOU overlap
with previously selected boxes are removed).
max_output_size: maximum number of retained boxes per class.
Returns:
a BoxList holding M boxes with a rank-1 scores field representing
corresponding scores for each box with scores sorted in decreasing order
and a rank-1 classes field representing a class label for each box.
Raises:
ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
a valid scores field.
"""
if not 0 <= iou_thresh <= 1.0:
raise ValueError('thresh must be between 0 and 1')
if not isinstance(boxlist, np_box_list.BoxList):
raise ValueError('boxlist must be a BoxList')
if not boxlist.has_field('scores'):
raise ValueError('input boxlist must have \'scores\' field')
scores = boxlist.get_field('scores')
if len(scores.shape) == 1:
scores = np.reshape(scores, [-1, 1])
elif len(scores.shape) == 2:
if scores.shape[1] is None:
raise ValueError('scores field must have statically defined second '
'dimension')
else:
raise ValueError('scores field must be of rank 1 or 2')
num_boxes = boxlist.num_boxes()
num_scores = scores.shape[0]
num_classes = scores.shape[1]
if num_boxes != num_scores:
raise ValueError('Incorrect scores field length: actual vs expected.')
selected_boxes_list = []
for class_idx in range(num_classes):
boxlist_and_class_scores = np_box_list.BoxList(boxlist.get())
class_scores = np.reshape(scores[0:num_scores, class_idx], [-1])
boxlist_and_class_scores.add_field('scores', class_scores)
boxlist_filt = filter_scores_greater_than(boxlist_and_class_scores,
score_thresh)
nms_result = non_max_suppression(boxlist_filt,
max_output_size=max_output_size,
iou_threshold=iou_thresh,
score_threshold=score_thresh)
nms_result.add_field(
'classes', np.zeros_like(nms_result.get_field('scores')) + class_idx)
selected_boxes_list.append(nms_result)
selected_boxes = concatenate(selected_boxes_list)
sorted_boxes = sort_by_field(selected_boxes, 'scores')
return sorted_boxes
def scale(boxlist, y_scale, x_scale):
"""Scale box coordinates in x and y dimensions.
Args:
boxlist: BoxList holding N boxes
y_scale: float
x_scale: float
Returns:
boxlist: BoxList holding N boxes
"""
y_min, x_min, y_max, x_max = np.array_split(boxlist.get(), 4, axis=1)
y_min = y_scale * y_min
y_max = y_scale * y_max
x_min = x_scale * x_min
x_max = x_scale * x_max
scaled_boxlist = np_box_list.BoxList(np.hstack([y_min, x_min, y_max, x_max]))
fields = boxlist.get_extra_fields()
for field in fields:
extra_field_data = boxlist.get_field(field)
scaled_boxlist.add_field(field, extra_field_data)
return scaled_boxlist
def clip_to_window(boxlist, window):
"""Clip bounding boxes to a window.
This op clips input bounding boxes (represented by bounding box
corners) to a window, optionally filtering out boxes that do not
overlap at all with the window.
Args:
boxlist: BoxList holding M_in boxes
window: a numpy array of shape [4] representing the
[y_min, x_min, y_max, x_max] window to which the op
should clip boxes.
Returns:
a BoxList holding M_out boxes where M_out <= M_in
"""
y_min, x_min, y_max, x_max = np.array_split(boxlist.get(), 4, axis=1)
win_y_min = window[0]
win_x_min = window[1]
win_y_max = window[2]
win_x_max = window[3]
y_min_clipped = np.fmax(np.fmin(y_min, win_y_max), win_y_min)
y_max_clipped = np.fmax(np.fmin(y_max, win_y_max), win_y_min)
x_min_clipped = np.fmax(np.fmin(x_min, win_x_max), win_x_min)
x_max_clipped = np.fmax(np.fmin(x_max, win_x_max), win_x_min)
clipped = np_box_list.BoxList(
np.hstack([y_min_clipped, x_min_clipped, y_max_clipped, x_max_clipped]))
clipped = _copy_extra_fields(clipped, boxlist)
areas = area(clipped)
nonzero_area_indices = np.reshape(np.nonzero(np.greater(areas, 0.0)),
[-1]).astype(np.int32)
return gather(clipped, nonzero_area_indices)
def prune_non_overlapping_boxes(boxlist1, boxlist2, minoverlap=0.0):
"""Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2.
For each box in boxlist1, we want its IOA to be more than minoverlap with
at least one of the boxes in boxlist2. If it does not, we remove it.
Args:
boxlist1: BoxList holding N boxes.
boxlist2: BoxList holding M boxes.
minoverlap: Minimum required overlap between boxes, to count them as
overlapping.
Returns:
A pruned boxlist with size [N', 4].
"""
intersection_over_area = ioa(boxlist2, boxlist1) # [M, N] tensor
intersection_over_area = np.amax(intersection_over_area, axis=0) # [N] tensor
keep_bool = np.greater_equal(intersection_over_area, np.array(minoverlap))
keep_inds = np.nonzero(keep_bool)[0]
new_boxlist1 = gather(boxlist1, keep_inds)
return new_boxlist1
def prune_outside_window(boxlist, window):
"""Prunes bounding boxes that fall outside a given window.
This function prunes bounding boxes that even partially fall outside the given
window. See also ClipToWindow which only prunes bounding boxes that fall
completely outside the window, and clips any bounding boxes that partially
overflow.
Args:
boxlist: a BoxList holding M_in boxes.
window: a numpy array of size 4, representing [ymin, xmin, ymax, xmax]
of the window.
Returns:
pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in.
valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
in the input tensor.
"""
y_min, x_min, y_max, x_max = np.array_split(boxlist.get(), 4, axis=1)
win_y_min = window[0]
win_x_min = window[1]
win_y_max = window[2]
win_x_max = window[3]
coordinate_violations = np.hstack([np.less(y_min, win_y_min),
np.less(x_min, win_x_min),
np.greater(y_max, win_y_max),
np.greater(x_max, win_x_max)])
valid_indices = np.reshape(
np.where(np.logical_not(np.max(coordinate_violations, axis=1))), [-1])
return gather(boxlist, valid_indices), valid_indices
def concatenate(boxlists, fields=None):
"""Concatenate list of BoxLists.
This op concatenates a list of input BoxLists into a larger BoxList. It also
handles concatenation of BoxList fields as long as the field tensor shapes
are equal except for the first dimension.
Args:
boxlists: list of BoxList objects
fields: optional list of fields to also concatenate. By default, all
fields from the first BoxList in the list are included in the
concatenation.
Returns:
a BoxList with number of boxes equal to
sum([boxlist.num_boxes() for boxlist in BoxList])
Raises:
ValueError: if boxlists is invalid (i.e., is not a list, is empty, or
contains non BoxList objects), or if requested fields are not contained in
all boxlists
"""
if not isinstance(boxlists, list):
raise ValueError('boxlists should be a list')
if not boxlists:
raise ValueError('boxlists should have nonzero length')
for boxlist in boxlists:
if not isinstance(boxlist, np_box_list.BoxList):
raise ValueError('all elements of boxlists should be BoxList objects')
concatenated = np_box_list.BoxList(
np.vstack([boxlist.get() for boxlist in boxlists]))
if fields is None:
fields = boxlists[0].get_extra_fields()
for field in fields:
first_field_shape = boxlists[0].get_field(field).shape
first_field_shape = first_field_shape[1:]
for boxlist in boxlists:
if not boxlist.has_field(field):
raise ValueError('boxlist must contain all requested fields')
field_shape = boxlist.get_field(field).shape
field_shape = field_shape[1:]
if field_shape != first_field_shape:
raise ValueError('field %s must have same shape for all boxlists '
'except for the 0th dimension.' % field)
concatenated_field = np.concatenate(
[boxlist.get_field(field) for boxlist in boxlists], axis=0)
concatenated.add_field(field, concatenated_field)
return concatenated
def filter_scores_greater_than(boxlist, thresh):
"""Filter to keep only boxes with score exceeding a given threshold.
This op keeps the collection of boxes whose corresponding scores are
greater than the input threshold.
Args:
boxlist: BoxList holding N boxes. Must contain a 'scores' field
representing detection scores.
thresh: scalar threshold
Returns:
a BoxList holding M boxes where M <= N
Raises:
ValueError: if boxlist not a BoxList object or if it does not
have a scores field
"""
if not isinstance(boxlist, np_box_list.BoxList):
raise ValueError('boxlist must be a BoxList')
if not boxlist.has_field('scores'):
raise ValueError('input boxlist must have \'scores\' field')
scores = boxlist.get_field('scores')
if len(scores.shape) > 2:
raise ValueError('Scores should have rank 1 or 2')
if len(scores.shape) == 2 and scores.shape[1] != 1:
raise ValueError('Scores should have rank 1 or have shape '
'consistent with [None, 1]')
high_score_indices = np.reshape(np.where(np.greater(scores, thresh)),
[-1]).astype(np.int32)
return gather(boxlist, high_score_indices)
def change_coordinate_frame(boxlist, window):
"""Change coordinate frame of the boxlist to be relative to window's frame.
Given a window of the form [ymin, xmin, ymax, xmax],
changes bounding box coordinates from boxlist to be relative to this window
(e.g., the min corner maps to (0,0) and the max corner maps to (1,1)).
An example use case is data augmentation: where we are given groundtruth
boxes (boxlist) and would like to randomly crop the image to some
window (window). In this case we need to change the coordinate frame of
each groundtruth box to be relative to this new window.
Args:
boxlist: A BoxList object holding N boxes.
window: a size 4 1-D numpy array.
Returns:
Returns a BoxList object with N boxes.
"""
win_height = window[2] - window[0]
win_width = window[3] - window[1]
boxlist_new = scale(
np_box_list.BoxList(boxlist.get() -
[window[0], window[1], window[0], window[1]]),
1.0 / win_height, 1.0 / win_width)
_copy_extra_fields(boxlist_new, boxlist)
return boxlist_new
def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from):
"""Copies the extra fields of boxlist_to_copy_from to boxlist_to_copy_to.
Args:
boxlist_to_copy_to: BoxList to which extra fields are copied.
boxlist_to_copy_from: BoxList from which fields are copied.
Returns:
boxlist_to_copy_to with extra fields.
"""
for field in boxlist_to_copy_from.get_extra_fields():
boxlist_to_copy_to.add_field(field, boxlist_to_copy_from.get_field(field))
return boxlist_to_copy_to
def _update_valid_indices_by_removing_high_iou_boxes(
selected_indices, is_index_valid, intersect_over_union, threshold):
max_iou = np.max(intersect_over_union[:, selected_indices], axis=1)
return np.logical_and(is_index_valid, max_iou <= threshold)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.utils.np_box_list_ops."""
import numpy as np
import tensorflow as tf
from object_detection.utils import np_box_list
from object_detection.utils import np_box_list_ops
class AreaRelatedTest(tf.test.TestCase):
def setUp(self):
boxes1 = np.array([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]],
dtype=float)
boxes2 = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]],
dtype=float)
self.boxlist1 = np_box_list.BoxList(boxes1)
self.boxlist2 = np_box_list.BoxList(boxes2)
def test_area(self):
areas = np_box_list_ops.area(self.boxlist1)
expected_areas = np.array([6.0, 5.0], dtype=float)
self.assertAllClose(expected_areas, areas)
def test_intersection(self):
intersection = np_box_list_ops.intersection(self.boxlist1, self.boxlist2)
expected_intersection = np.array([[2.0, 0.0, 6.0], [1.0, 0.0, 5.0]],
dtype=float)
self.assertAllClose(intersection, expected_intersection)
def test_iou(self):
iou = np_box_list_ops.iou(self.boxlist1, self.boxlist2)
expected_iou = np.array([[2.0 / 16.0, 0.0, 6.0 / 400.0],
[1.0 / 16.0, 0.0, 5.0 / 400.0]],
dtype=float)
self.assertAllClose(iou, expected_iou)
def test_ioa(self):
boxlist1 = np_box_list.BoxList(
np.array(
[[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=
np.float32))
boxlist2 = np_box_list.BoxList(
np.array(
[[0.5, 0.25, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]], dtype=np.float32))
ioa21 = np_box_list_ops.ioa(boxlist2, boxlist1)
expected_ioa21 = np.array([[0.5, 0.0],
[1.0, 1.0]],
dtype=np.float32)
self.assertAllClose(ioa21, expected_ioa21)
def test_scale(self):
boxlist = np_box_list.BoxList(
np.array(
[[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=
np.float32))
boxlist_scaled = np_box_list_ops.scale(boxlist, 2.0, 3.0)
expected_boxlist_scaled = np_box_list.BoxList(
np.array(
[[0.5, 0.75, 1.5, 2.25], [0.0, 0.0, 1.0, 2.25]], dtype=np.float32))
self.assertAllClose(expected_boxlist_scaled.get(), boxlist_scaled.get())
def test_clip_to_window(self):
boxlist = np_box_list.BoxList(
np.array(
[[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75],
[-0.2, -0.3, 0.7, 1.5]],
dtype=np.float32))
boxlist_clipped = np_box_list_ops.clip_to_window(boxlist,
[0.0, 0.0, 1.0, 1.0])
expected_boxlist_clipped = np_box_list.BoxList(
np.array(
[[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75],
[0.0, 0.0, 0.7, 1.0]],
dtype=np.float32))
self.assertAllClose(expected_boxlist_clipped.get(), boxlist_clipped.get())
def test_prune_outside_window(self):
boxlist = np_box_list.BoxList(
np.array(
[[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75],
[-0.2, -0.3, 0.7, 1.5]],
dtype=np.float32))
boxlist_pruned, _ = np_box_list_ops.prune_outside_window(
boxlist, [0.0, 0.0, 1.0, 1.0])
expected_boxlist_pruned = np_box_list.BoxList(
np.array(
[[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=
np.float32))
self.assertAllClose(expected_boxlist_pruned.get(), boxlist_pruned.get())
def test_concatenate(self):
boxlist1 = np_box_list.BoxList(
np.array(
[[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=
np.float32))
boxlist2 = np_box_list.BoxList(
np.array(
[[0.5, 0.25, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]], dtype=np.float32))
boxlists = [boxlist1, boxlist2]
boxlist_concatenated = np_box_list_ops.concatenate(boxlists)
boxlist_concatenated_expected = np_box_list.BoxList(
np.array(
[[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75],
[0.5, 0.25, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]],
dtype=np.float32))
self.assertAllClose(boxlist_concatenated_expected.get(),
boxlist_concatenated.get())
def test_change_coordinate_frame(self):
boxlist = np_box_list.BoxList(
np.array(
[[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=
np.float32))
boxlist_coord = np_box_list_ops.change_coordinate_frame(
boxlist, np.array([0, 0, 0.5, 0.5], dtype=np.float32))
expected_boxlist_coord = np_box_list.BoxList(
np.array([[0.5, 0.5, 1.5, 1.5], [0, 0, 1.0, 1.5]], dtype=np.float32))
self.assertAllClose(boxlist_coord.get(), expected_boxlist_coord.get())
def test_filter_scores_greater_than(self):
boxlist = np_box_list.BoxList(
np.array(
[[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=
np.float32))
boxlist.add_field('scores', np.array([0.8, 0.2], np.float32))
boxlist_greater = np_box_list_ops.filter_scores_greater_than(boxlist, 0.5)
expected_boxlist_greater = np_box_list.BoxList(
np.array([[0.25, 0.25, 0.75, 0.75]], dtype=np.float32))
self.assertAllClose(boxlist_greater.get(), expected_boxlist_greater.get())
class GatherOpsTest(tf.test.TestCase):
def setUp(self):
boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]],
dtype=float)
self.boxlist = np_box_list.BoxList(boxes)
self.boxlist.add_field('scores', np.array([0.5, 0.7, 0.9], dtype=float))
self.boxlist.add_field('labels',
np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0],
[0, 0, 0, 0, 1]],
dtype=int))
def test_gather_with_out_of_range_indices(self):
indices = np.array([3, 1], dtype=int)
boxlist = self.boxlist
with self.assertRaises(ValueError):
np_box_list_ops.gather(boxlist, indices)
def test_gather_with_invalid_multidimensional_indices(self):
indices = np.array([[0, 1], [1, 2]], dtype=int)
boxlist = self.boxlist
with self.assertRaises(ValueError):
np_box_list_ops.gather(boxlist, indices)
def test_gather_without_fields_specified(self):
indices = np.array([2, 0, 1], dtype=int)
boxlist = self.boxlist
subboxlist = np_box_list_ops.gather(boxlist, indices)
expected_scores = np.array([0.9, 0.5, 0.7], dtype=float)
self.assertAllClose(expected_scores, subboxlist.get_field('scores'))
expected_boxes = np.array([[0.0, 0.0, 20.0, 20.0], [3.0, 4.0, 6.0, 8.0],
[14.0, 14.0, 15.0, 15.0]],
dtype=float)
self.assertAllClose(expected_boxes, subboxlist.get())
expected_labels = np.array([[0, 0, 0, 0, 1], [0, 0, 0, 1, 0],
[0, 1, 0, 0, 0]],
dtype=int)
self.assertAllClose(expected_labels, subboxlist.get_field('labels'))
def test_gather_with_invalid_field_specified(self):
indices = np.array([2, 0, 1], dtype=int)
boxlist = self.boxlist
with self.assertRaises(ValueError):
np_box_list_ops.gather(boxlist, indices, 'labels')
with self.assertRaises(ValueError):
np_box_list_ops.gather(boxlist, indices, ['objectness'])
def test_gather_with_fields_specified(self):
indices = np.array([2, 0, 1], dtype=int)
boxlist = self.boxlist
subboxlist = np_box_list_ops.gather(boxlist, indices, ['labels'])
self.assertFalse(subboxlist.has_field('scores'))
expected_boxes = np.array([[0.0, 0.0, 20.0, 20.0], [3.0, 4.0, 6.0, 8.0],
[14.0, 14.0, 15.0, 15.0]],
dtype=float)
self.assertAllClose(expected_boxes, subboxlist.get())
expected_labels = np.array([[0, 0, 0, 0, 1], [0, 0, 0, 1, 0],
[0, 1, 0, 0, 0]],
dtype=int)
self.assertAllClose(expected_labels, subboxlist.get_field('labels'))
class SortByFieldTest(tf.test.TestCase):
def setUp(self):
boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]],
dtype=float)
self.boxlist = np_box_list.BoxList(boxes)
self.boxlist.add_field('scores', np.array([0.5, 0.9, 0.4], dtype=float))
self.boxlist.add_field('labels',
np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0],
[0, 0, 0, 0, 1]],
dtype=int))
def test_with_invalid_field(self):
with self.assertRaises(ValueError):
np_box_list_ops.sort_by_field(self.boxlist, 'objectness')
with self.assertRaises(ValueError):
np_box_list_ops.sort_by_field(self.boxlist, 'labels')
def test_with_invalid_sorting_order(self):
with self.assertRaises(ValueError):
np_box_list_ops.sort_by_field(self.boxlist, 'scores', 'Descending')
def test_with_descending_sorting(self):
sorted_boxlist = np_box_list_ops.sort_by_field(self.boxlist, 'scores')
expected_boxes = np.array([[14.0, 14.0, 15.0, 15.0], [3.0, 4.0, 6.0, 8.0],
[0.0, 0.0, 20.0, 20.0]],
dtype=float)
self.assertAllClose(expected_boxes, sorted_boxlist.get())
expected_scores = np.array([0.9, 0.5, 0.4], dtype=float)
self.assertAllClose(expected_scores, sorted_boxlist.get_field('scores'))
def test_with_ascending_sorting(self):
sorted_boxlist = np_box_list_ops.sort_by_field(
self.boxlist, 'scores', np_box_list_ops.SortOrder.ASCEND)
expected_boxes = np.array([[0.0, 0.0, 20.0, 20.0],
[3.0, 4.0, 6.0, 8.0],
[14.0, 14.0, 15.0, 15.0],],
dtype=float)
self.assertAllClose(expected_boxes, sorted_boxlist.get())
expected_scores = np.array([0.4, 0.5, 0.9], dtype=float)
self.assertAllClose(expected_scores, sorted_boxlist.get_field('scores'))
class NonMaximumSuppressionTest(tf.test.TestCase):
def setUp(self):
self._boxes = np.array([[0, 0, 1, 1],
[0, 0.1, 1, 1.1],
[0, -0.1, 1, 0.9],
[0, 10, 1, 11],
[0, 10.1, 1, 11.1],
[0, 100, 1, 101]],
dtype=float)
self._boxlist = np_box_list.BoxList(self._boxes)
def test_with_no_scores_field(self):
boxlist = np_box_list.BoxList(self._boxes)
max_output_size = 3
iou_threshold = 0.5
with self.assertRaises(ValueError):
np_box_list_ops.non_max_suppression(
boxlist, max_output_size, iou_threshold)
def test_nms_disabled_max_output_size_equals_three(self):
boxlist = np_box_list.BoxList(self._boxes)
boxlist.add_field('scores',
np.array([.9, .75, .6, .95, .2, .3], dtype=float))
max_output_size = 3
iou_threshold = 1. # No NMS
expected_boxes = np.array([[0, 10, 1, 11], [0, 0, 1, 1], [0, 0.1, 1, 1.1]],
dtype=float)
nms_boxlist = np_box_list_ops.non_max_suppression(
boxlist, max_output_size, iou_threshold)
self.assertAllClose(nms_boxlist.get(), expected_boxes)
def test_select_from_three_clusters(self):
boxlist = np_box_list.BoxList(self._boxes)
boxlist.add_field('scores',
np.array([.9, .75, .6, .95, .2, .3], dtype=float))
max_output_size = 3
iou_threshold = 0.5
expected_boxes = np.array([[0, 10, 1, 11], [0, 0, 1, 1], [0, 100, 1, 101]],
dtype=float)
nms_boxlist = np_box_list_ops.non_max_suppression(
boxlist, max_output_size, iou_threshold)
self.assertAllClose(nms_boxlist.get(), expected_boxes)
def test_select_at_most_two_from_three_clusters(self):
boxlist = np_box_list.BoxList(self._boxes)
boxlist.add_field('scores',
np.array([.9, .75, .6, .95, .5, .3], dtype=float))
max_output_size = 2
iou_threshold = 0.5
expected_boxes = np.array([[0, 10, 1, 11], [0, 0, 1, 1]], dtype=float)
nms_boxlist = np_box_list_ops.non_max_suppression(
boxlist, max_output_size, iou_threshold)
self.assertAllClose(nms_boxlist.get(), expected_boxes)
def test_select_at_most_thirty_from_three_clusters(self):
boxlist = np_box_list.BoxList(self._boxes)
boxlist.add_field('scores',
np.array([.9, .75, .6, .95, .5, .3], dtype=float))
max_output_size = 30
iou_threshold = 0.5
expected_boxes = np.array([[0, 10, 1, 11], [0, 0, 1, 1], [0, 100, 1, 101]],
dtype=float)
nms_boxlist = np_box_list_ops.non_max_suppression(
boxlist, max_output_size, iou_threshold)
self.assertAllClose(nms_boxlist.get(), expected_boxes)
def test_select_from_ten_indentical_boxes(self):
boxes = np.array(10 * [[0, 0, 1, 1]], dtype=float)
boxlist = np_box_list.BoxList(boxes)
boxlist.add_field('scores', np.array(10 * [0.8]))
iou_threshold = .5
max_output_size = 3
expected_boxes = np.array([[0, 0, 1, 1]], dtype=float)
nms_boxlist = np_box_list_ops.non_max_suppression(
boxlist, max_output_size, iou_threshold)
self.assertAllClose(nms_boxlist.get(), expected_boxes)
def test_different_iou_threshold(self):
boxes = np.array([[0, 0, 20, 100], [0, 0, 20, 80], [200, 200, 210, 300],
[200, 200, 210, 250]],
dtype=float)
boxlist = np_box_list.BoxList(boxes)
boxlist.add_field('scores', np.array([0.9, 0.8, 0.7, 0.6]))
max_output_size = 4
iou_threshold = .4
expected_boxes = np.array([[0, 0, 20, 100],
[200, 200, 210, 300],],
dtype=float)
nms_boxlist = np_box_list_ops.non_max_suppression(
boxlist, max_output_size, iou_threshold)
self.assertAllClose(nms_boxlist.get(), expected_boxes)
iou_threshold = .5
expected_boxes = np.array([[0, 0, 20, 100], [200, 200, 210, 300],
[200, 200, 210, 250]],
dtype=float)
nms_boxlist = np_box_list_ops.non_max_suppression(
boxlist, max_output_size, iou_threshold)
self.assertAllClose(nms_boxlist.get(), expected_boxes)
iou_threshold = .8
expected_boxes = np.array([[0, 0, 20, 100], [0, 0, 20, 80],
[200, 200, 210, 300], [200, 200, 210, 250]],
dtype=float)
nms_boxlist = np_box_list_ops.non_max_suppression(
boxlist, max_output_size, iou_threshold)
self.assertAllClose(nms_boxlist.get(), expected_boxes)
def test_multiclass_nms(self):
boxlist = np_box_list.BoxList(
np.array(
[[0.2, 0.4, 0.8, 0.8], [0.4, 0.2, 0.8, 0.8], [0.6, 0.0, 1.0, 1.0]],
dtype=np.float32))
scores = np.array([[-0.2, 0.1, 0.5, -0.4, 0.3],
[0.7, -0.7, 0.6, 0.2, -0.9],
[0.4, 0.34, -0.9, 0.2, 0.31]],
dtype=np.float32)
boxlist.add_field('scores', scores)
boxlist_clean = np_box_list_ops.multi_class_non_max_suppression(
boxlist, score_thresh=0.25, iou_thresh=0.1, max_output_size=3)
scores_clean = boxlist_clean.get_field('scores')
classes_clean = boxlist_clean.get_field('classes')
boxes = boxlist_clean.get()
expected_scores = np.array([0.7, 0.6, 0.34, 0.31])
expected_classes = np.array([0, 2, 1, 4])
expected_boxes = np.array([[0.4, 0.2, 0.8, 0.8],
[0.4, 0.2, 0.8, 0.8],
[0.6, 0.0, 1.0, 1.0],
[0.6, 0.0, 1.0, 1.0]],
dtype=np.float32)
self.assertAllClose(scores_clean, expected_scores)
self.assertAllClose(classes_clean, expected_classes)
self.assertAllClose(boxes, expected_boxes)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.utils.np_box_list_test."""
import numpy as np
import tensorflow as tf
from object_detection.utils import np_box_list
class BoxListTest(tf.test.TestCase):
def test_invalid_box_data(self):
with self.assertRaises(ValueError):
np_box_list.BoxList([0, 0, 1, 1])
with self.assertRaises(ValueError):
np_box_list.BoxList(np.array([[0, 0, 1, 1]], dtype=int))
with self.assertRaises(ValueError):
np_box_list.BoxList(np.array([0, 1, 1, 3, 4], dtype=float))
with self.assertRaises(ValueError):
np_box_list.BoxList(np.array([[0, 1, 1, 3], [3, 1, 1, 5]], dtype=float))
def test_has_field_with_existed_field(self):
boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]],
dtype=float)
boxlist = np_box_list.BoxList(boxes)
self.assertTrue(boxlist.has_field('boxes'))
def test_has_field_with_nonexisted_field(self):
boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]],
dtype=float)
boxlist = np_box_list.BoxList(boxes)
self.assertFalse(boxlist.has_field('scores'))
def test_get_field_with_existed_field(self):
boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]],
dtype=float)
boxlist = np_box_list.BoxList(boxes)
self.assertTrue(np.allclose(boxlist.get_field('boxes'), boxes))
def test_get_field_with_nonexited_field(self):
boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]],
dtype=float)
boxlist = np_box_list.BoxList(boxes)
with self.assertRaises(ValueError):
boxlist.get_field('scores')
class AddExtraFieldTest(tf.test.TestCase):
def setUp(self):
boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]],
dtype=float)
self.boxlist = np_box_list.BoxList(boxes)
def test_add_already_existed_field(self):
with self.assertRaises(ValueError):
self.boxlist.add_field('boxes', np.array([[0, 0, 0, 1, 0]], dtype=float))
def test_add_invalid_field_data(self):
with self.assertRaises(ValueError):
self.boxlist.add_field('scores', np.array([0.5, 0.7], dtype=float))
with self.assertRaises(ValueError):
self.boxlist.add_field('scores',
np.array([0.5, 0.7, 0.9, 0.1], dtype=float))
def test_add_single_dimensional_field_data(self):
boxlist = self.boxlist
scores = np.array([0.5, 0.7, 0.9], dtype=float)
boxlist.add_field('scores', scores)
self.assertTrue(np.allclose(scores, self.boxlist.get_field('scores')))
def test_add_multi_dimensional_field_data(self):
boxlist = self.boxlist
labels = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 1]],
dtype=int)
boxlist.add_field('labels', labels)
self.assertTrue(np.allclose(labels, self.boxlist.get_field('labels')))
def test_get_extra_fields(self):
boxlist = self.boxlist
self.assertSameElements(boxlist.get_extra_fields(), [])
scores = np.array([0.5, 0.7, 0.9], dtype=float)
boxlist.add_field('scores', scores)
self.assertSameElements(boxlist.get_extra_fields(), ['scores'])
labels = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 1]],
dtype=int)
boxlist.add_field('labels', labels)
self.assertSameElements(boxlist.get_extra_fields(), ['scores', 'labels'])
def test_get_coordinates(self):
y_min, x_min, y_max, x_max = self.boxlist.get_coordinates()
expected_y_min = np.array([3.0, 14.0, 0.0], dtype=float)
expected_x_min = np.array([4.0, 14.0, 0.0], dtype=float)
expected_y_max = np.array([6.0, 15.0, 20.0], dtype=float)
expected_x_max = np.array([8.0, 15.0, 20.0], dtype=float)
self.assertTrue(np.allclose(y_min, expected_y_min))
self.assertTrue(np.allclose(x_min, expected_x_min))
self.assertTrue(np.allclose(y_max, expected_y_max))
self.assertTrue(np.allclose(x_max, expected_x_max))
def test_num_boxes(self):
boxes = np.array([[0., 0., 100., 100.], [10., 30., 50., 70.]], dtype=float)
boxlist = np_box_list.BoxList(boxes)
expected_num_boxes = 2
self.assertEquals(boxlist.num_boxes(), expected_num_boxes)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Operations for [N, 4] numpy arrays representing bounding boxes.
Example box operations that are supported:
* Areas: compute bounding box areas
* IOU: pairwise intersection-over-union scores
"""
import numpy as np
def area(boxes):
"""Computes area of boxes.
Args:
boxes: Numpy array with shape [N, 4] holding N boxes
Returns:
a numpy array with shape [N*1] representing box areas
"""
return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
def intersection(boxes1, boxes2):
"""Compute pairwise intersection areas between boxes.
Args:
boxes1: a numpy array with shape [N, 4] holding N boxes
boxes2: a numpy array with shape [M, 4] holding M boxes
Returns:
a numpy array with shape [N*M] representing pairwise intersection area
"""
[y_min1, x_min1, y_max1, x_max1] = np.split(boxes1, 4, axis=1)
[y_min2, x_min2, y_max2, x_max2] = np.split(boxes2, 4, axis=1)
all_pairs_min_ymax = np.minimum(y_max1, np.transpose(y_max2))
all_pairs_max_ymin = np.maximum(y_min1, np.transpose(y_min2))
intersect_heights = np.maximum(
np.zeros(all_pairs_max_ymin.shape),
all_pairs_min_ymax - all_pairs_max_ymin)
all_pairs_min_xmax = np.minimum(x_max1, np.transpose(x_max2))
all_pairs_max_xmin = np.maximum(x_min1, np.transpose(x_min2))
intersect_widths = np.maximum(
np.zeros(all_pairs_max_xmin.shape),
all_pairs_min_xmax - all_pairs_max_xmin)
return intersect_heights * intersect_widths
def iou(boxes1, boxes2):
"""Computes pairwise intersection-over-union between box collections.
Args:
boxes1: a numpy array with shape [N, 4] holding N boxes.
boxes2: a numpy array with shape [M, 4] holding N boxes.
Returns:
a numpy array with shape [N, M] representing pairwise iou scores.
"""
intersect = intersection(boxes1, boxes2)
area1 = area(boxes1)
area2 = area(boxes2)
union = np.expand_dims(area1, axis=1) + np.expand_dims(
area2, axis=0) - intersect
return intersect / union
def ioa(boxes1, boxes2):
"""Computes pairwise intersection-over-area between box collections.
Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
their intersection area over box2's area. Note that ioa is not symmetric,
that is, IOA(box1, box2) != IOA(box2, box1).
Args:
boxes1: a numpy array with shape [N, 4] holding N boxes.
boxes2: a numpy array with shape [M, 4] holding N boxes.
Returns:
a numpy array with shape [N, M] representing pairwise ioa scores.
"""
intersect = intersection(boxes1, boxes2)
areas = np.expand_dims(area(boxes2), axis=0)
return intersect / areas
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.np_box_ops."""
import numpy as np
import tensorflow as tf
from object_detection.utils import np_box_ops
class BoxOpsTests(tf.test.TestCase):
def setUp(self):
boxes1 = np.array([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]],
dtype=float)
boxes2 = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]],
dtype=float)
self.boxes1 = boxes1
self.boxes2 = boxes2
def testArea(self):
areas = np_box_ops.area(self.boxes1)
expected_areas = np.array([6.0, 5.0], dtype=float)
self.assertAllClose(expected_areas, areas)
def testIntersection(self):
intersection = np_box_ops.intersection(self.boxes1, self.boxes2)
expected_intersection = np.array([[2.0, 0.0, 6.0], [1.0, 0.0, 5.0]],
dtype=float)
self.assertAllClose(intersection, expected_intersection)
def testIOU(self):
iou = np_box_ops.iou(self.boxes1, self.boxes2)
expected_iou = np.array([[2.0 / 16.0, 0.0, 6.0 / 400.0],
[1.0 / 16.0, 0.0, 5.0 / 400.0]],
dtype=float)
self.assertAllClose(iou, expected_iou)
def testIOA(self):
boxes1 = np.array([[0.25, 0.25, 0.75, 0.75],
[0.0, 0.0, 0.5, 0.75]],
dtype=np.float32)
boxes2 = np.array([[0.5, 0.25, 1.0, 1.0],
[0.0, 0.0, 1.0, 1.0]],
dtype=np.float32)
ioa21 = np_box_ops.ioa(boxes2, boxes1)
expected_ioa21 = np.array([[0.5, 0.0],
[1.0, 1.0]],
dtype=np.float32)
self.assertAllClose(ioa21, expected_ioa21)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""object_detection_evaluation module.
ObjectDetectionEvaluation is a class which manages ground truth information of a
object detection dataset, and computes frequently used detection metrics such as
Precision, Recall, CorLoc of the provided detection results.
It supports the following operations:
1) Add ground truth information of images sequentially.
2) Add detection result of images sequentially.
3) Evaluate detection metrics on already inserted detection results.
4) Write evaluation result into a pickle file for future processing or
visualization.
Note: This module operates on numpy boxes and box lists.
"""
import logging
import numpy as np
from object_detection.utils import metrics
from object_detection.utils import per_image_evaluation
class ObjectDetectionEvaluation(object):
"""Evaluate Object Detection Result."""
def __init__(self,
num_groundtruth_classes,
matching_iou_threshold=0.5,
nms_iou_threshold=1.0,
nms_max_output_boxes=10000):
self.per_image_eval = per_image_evaluation.PerImageEvaluation(
num_groundtruth_classes, matching_iou_threshold, nms_iou_threshold,
nms_max_output_boxes)
self.num_class = num_groundtruth_classes
self.groundtruth_boxes = {}
self.groundtruth_class_labels = {}
self.groundtruth_is_difficult_list = {}
self.num_gt_instances_per_class = np.zeros(self.num_class, dtype=int)
self.num_gt_imgs_per_class = np.zeros(self.num_class, dtype=int)
self.detection_keys = set()
self.scores_per_class = [[] for _ in range(self.num_class)]
self.tp_fp_labels_per_class = [[] for _ in range(self.num_class)]
self.num_images_correctly_detected_per_class = np.zeros(self.num_class)
self.average_precision_per_class = np.empty(self.num_class, dtype=float)
self.average_precision_per_class.fill(np.nan)
self.precisions_per_class = []
self.recalls_per_class = []
self.corloc_per_class = np.ones(self.num_class, dtype=float)
def clear_detections(self):
self.detection_keys = {}
self.scores_per_class = [[] for _ in range(self.num_class)]
self.tp_fp_labels_per_class = [[] for _ in range(self.num_class)]
self.num_images_correctly_detected_per_class = np.zeros(self.num_class)
self.average_precision_per_class = np.zeros(self.num_class, dtype=float)
self.precisions_per_class = []
self.recalls_per_class = []
self.corloc_per_class = np.ones(self.num_class, dtype=float)
def add_single_ground_truth_image_info(self,
image_key,
groundtruth_boxes,
groundtruth_class_labels,
groundtruth_is_difficult_list=None):
"""Add ground truth info of a single image into the evaluation database.
Args:
image_key: sha256 key of image content
groundtruth_boxes: A numpy array of shape [M, 4] representing object box
coordinates[y_min, x_min, y_max, x_max]
groundtruth_class_labels: A 1-d numpy array of length M representing class
labels
groundtruth_is_difficult_list: A length M numpy boolean array denoting
whether a ground truth box is a difficult instance or not. To support
the case that no boxes are difficult, it is by default set as None.
"""
if image_key in self.groundtruth_boxes:
logging.warn(
'image %s has already been added to the ground truth database.',
image_key)
return
self.groundtruth_boxes[image_key] = groundtruth_boxes
self.groundtruth_class_labels[image_key] = groundtruth_class_labels
if groundtruth_is_difficult_list is None:
num_boxes = groundtruth_boxes.shape[0]
groundtruth_is_difficult_list = np.zeros(num_boxes, dtype=bool)
self.groundtruth_is_difficult_list[
image_key] = groundtruth_is_difficult_list.astype(dtype=bool)
self._update_ground_truth_statistics(groundtruth_class_labels,
groundtruth_is_difficult_list)
def add_single_detected_image_info(self, image_key, detected_boxes,
detected_scores, detected_class_labels):
"""Add detected result of a single image into the evaluation database.
Args:
image_key: sha256 key of image content
detected_boxes: A numpy array of shape [N, 4] representing detected box
coordinates[y_min, x_min, y_max, x_max]
detected_scores: A 1-d numpy array of length N representing classification
score
detected_class_labels: A 1-d numpy array of length N representing class
labels
Raises:
ValueError: if detected_boxes, detected_scores and detected_class_labels
do not have the same length.
"""
if (len(detected_boxes) != len(detected_scores) or
len(detected_boxes) != len(detected_class_labels)):
raise ValueError('detected_boxes, detected_scores and '
'detected_class_labels should all have same lengths. Got'
'[%d, %d, %d]' % len(detected_boxes),
len(detected_scores), len(detected_class_labels))
if image_key in self.detection_keys:
logging.warn(
'image %s has already been added to the detection result database',
image_key)
return
self.detection_keys.add(image_key)
if image_key in self.groundtruth_boxes:
groundtruth_boxes = self.groundtruth_boxes[image_key]
groundtruth_class_labels = self.groundtruth_class_labels[image_key]
groundtruth_is_difficult_list = self.groundtruth_is_difficult_list[
image_key]
else:
groundtruth_boxes = np.empty(shape=[0, 4], dtype=float)
groundtruth_class_labels = np.array([], dtype=int)
groundtruth_is_difficult_list = np.array([], dtype=bool)
scores, tp_fp_labels, is_class_correctly_detected_in_image = (
self.per_image_eval.compute_object_detection_metrics(
detected_boxes, detected_scores, detected_class_labels,
groundtruth_boxes, groundtruth_class_labels,
groundtruth_is_difficult_list))
for i in range(self.num_class):
self.scores_per_class[i].append(scores[i])
self.tp_fp_labels_per_class[i].append(tp_fp_labels[i])
(self.num_images_correctly_detected_per_class
) += is_class_correctly_detected_in_image
def _update_ground_truth_statistics(self, groundtruth_class_labels,
groundtruth_is_difficult_list):
"""Update grouth truth statitistics.
1. Difficult boxes are ignored when counting the number of ground truth
instances as done in Pascal VOC devkit.
2. Difficult boxes are treated as normal boxes when computing CorLoc related
statitistics.
Args:
groundtruth_class_labels: An integer numpy array of length M,
representing M class labels of object instances in ground truth
groundtruth_is_difficult_list: A boolean numpy array of length M denoting
whether a ground truth box is a difficult instance or not
"""
for class_index in range(self.num_class):
num_gt_instances = np.sum(groundtruth_class_labels[
~groundtruth_is_difficult_list] == class_index)
self.num_gt_instances_per_class[class_index] += num_gt_instances
if np.any(groundtruth_class_labels == class_index):
self.num_gt_imgs_per_class[class_index] += 1
def evaluate(self):
"""Compute evaluation result.
Returns:
average_precision_per_class: float numpy array of average precision for
each class.
mean_ap: mean average precision of all classes, float scalar
precisions_per_class: List of precisions, each precision is a float numpy
array
recalls_per_class: List of recalls, each recall is a float numpy array
corloc_per_class: numpy float array
mean_corloc: Mean CorLoc score for each class, float scalar
"""
if (self.num_gt_instances_per_class == 0).any():
logging.warn(
'The following classes have no ground truth examples: %s',
np.squeeze(np.argwhere(self.num_gt_instances_per_class == 0)))
for class_index in range(self.num_class):
if self.num_gt_instances_per_class[class_index] == 0:
continue
scores = np.concatenate(self.scores_per_class[class_index])
tp_fp_labels = np.concatenate(self.tp_fp_labels_per_class[class_index])
precision, recall = metrics.compute_precision_recall(
scores, tp_fp_labels, self.num_gt_instances_per_class[class_index])
self.precisions_per_class.append(precision)
self.recalls_per_class.append(recall)
average_precision = metrics.compute_average_precision(precision, recall)
self.average_precision_per_class[class_index] = average_precision
self.corloc_per_class = metrics.compute_cor_loc(
self.num_gt_imgs_per_class,
self.num_images_correctly_detected_per_class)
mean_ap = np.nanmean(self.average_precision_per_class)
mean_corloc = np.nanmean(self.corloc_per_class)
return (self.average_precision_per_class, mean_ap,
self.precisions_per_class, self.recalls_per_class,
self.corloc_per_class, mean_corloc)
def get_eval_result(self):
return EvalResult(self.average_precision_per_class,
self.precisions_per_class, self.recalls_per_class,
self.corloc_per_class)
class EvalResult(object):
def __init__(self, average_precisions, precisions, recalls, all_corloc):
self.precisions = precisions
self.recalls = recalls
self.all_corloc = all_corloc
self.average_precisions = average_precisions
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.utils.object_detection_evaluation."""
import numpy as np
import tensorflow as tf
from object_detection.utils import object_detection_evaluation
class ObjectDetectionEvaluationTest(tf.test.TestCase):
def setUp(self):
num_groundtruth_classes = 3
self.od_eval = object_detection_evaluation.ObjectDetectionEvaluation(
num_groundtruth_classes)
image_key1 = "img1"
groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
dtype=float)
groundtruth_class_labels1 = np.array([0, 2, 0], dtype=int)
self.od_eval.add_single_ground_truth_image_info(
image_key1, groundtruth_boxes1, groundtruth_class_labels1)
image_key2 = "img2"
groundtruth_boxes2 = np.array([[10, 10, 11, 11], [500, 500, 510, 510],
[10, 10, 12, 12]], dtype=float)
groundtruth_class_labels2 = np.array([0, 0, 2], dtype=int)
groundtruth_is_difficult_list2 = np.array([False, True, False], dtype=bool)
self.od_eval.add_single_ground_truth_image_info(
image_key2, groundtruth_boxes2, groundtruth_class_labels2,
groundtruth_is_difficult_list2)
image_key3 = "img3"
groundtruth_boxes3 = np.array([[0, 0, 1, 1]], dtype=float)
groundtruth_class_labels3 = np.array([1], dtype=int)
self.od_eval.add_single_ground_truth_image_info(
image_key3, groundtruth_boxes3, groundtruth_class_labels3)
image_key = "img2"
detected_boxes = np.array(
[[10, 10, 11, 11], [100, 100, 120, 120], [100, 100, 220, 220]],
dtype=float)
detected_class_labels = np.array([0, 0, 2], dtype=int)
detected_scores = np.array([0.7, 0.8, 0.9], dtype=float)
self.od_eval.add_single_detected_image_info(
image_key, detected_boxes, detected_scores, detected_class_labels)
def test_add_single_ground_truth_image_info(self):
expected_num_gt_instances_per_class = np.array([3, 1, 2], dtype=int)
expected_num_gt_imgs_per_class = np.array([2, 1, 2], dtype=int)
self.assertTrue(np.array_equal(expected_num_gt_instances_per_class,
self.od_eval.num_gt_instances_per_class))
self.assertTrue(np.array_equal(expected_num_gt_imgs_per_class,
self.od_eval.num_gt_imgs_per_class))
groundtruth_boxes2 = np.array([[10, 10, 11, 11], [500, 500, 510, 510],
[10, 10, 12, 12]], dtype=float)
self.assertTrue(np.allclose(self.od_eval.groundtruth_boxes["img2"],
groundtruth_boxes2))
groundtruth_is_difficult_list2 = np.array([False, True, False], dtype=bool)
self.assertTrue(np.allclose(
self.od_eval.groundtruth_is_difficult_list["img2"],
groundtruth_is_difficult_list2))
groundtruth_class_labels1 = np.array([0, 2, 0], dtype=int)
self.assertTrue(np.array_equal(self.od_eval.groundtruth_class_labels[
"img1"], groundtruth_class_labels1))
def test_add_single_detected_image_info(self):
expected_scores_per_class = [[np.array([0.8, 0.7], dtype=float)], [],
[np.array([0.9], dtype=float)]]
expected_tp_fp_labels_per_class = [[np.array([0, 1], dtype=bool)], [],
[np.array([0], dtype=bool)]]
expected_num_images_correctly_detected_per_class = np.array([0, 0, 0],
dtype=int)
for i in range(self.od_eval.num_class):
for j in range(len(expected_scores_per_class[i])):
self.assertTrue(np.allclose(expected_scores_per_class[i][j],
self.od_eval.scores_per_class[i][j]))
self.assertTrue(np.array_equal(expected_tp_fp_labels_per_class[i][
j], self.od_eval.tp_fp_labels_per_class[i][j]))
self.assertTrue(np.array_equal(
expected_num_images_correctly_detected_per_class,
self.od_eval.num_images_correctly_detected_per_class))
def test_evaluate(self):
(average_precision_per_class, mean_ap, precisions_per_class,
recalls_per_class, corloc_per_class,
mean_corloc) = self.od_eval.evaluate()
expected_precisions_per_class = [np.array([0, 0.5], dtype=float),
np.array([], dtype=float),
np.array([0], dtype=float)]
expected_recalls_per_class = [
np.array([0, 1. / 3.], dtype=float), np.array([], dtype=float),
np.array([0], dtype=float)
]
expected_average_precision_per_class = np.array([1. / 6., 0, 0],
dtype=float)
expected_corloc_per_class = np.array([0, np.divide(0, 0), 0], dtype=float)
expected_mean_ap = 1. / 18
expected_mean_corloc = 0.0
for i in range(self.od_eval.num_class):
self.assertTrue(np.allclose(expected_precisions_per_class[i],
precisions_per_class[i]))
self.assertTrue(np.allclose(expected_recalls_per_class[i],
recalls_per_class[i]))
self.assertTrue(np.allclose(expected_average_precision_per_class,
average_precision_per_class))
self.assertTrue(np.allclose(expected_corloc_per_class, corloc_per_class))
self.assertAlmostEqual(expected_mean_ap, mean_ap)
self.assertAlmostEqual(expected_mean_corloc, mean_corloc)
if __name__ == "__main__":
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A module for helper tensorflow ops."""
import math
import tensorflow as tf
from object_detection.core import box_list
from object_detection.core import box_list_ops
from object_detection.core import standard_fields as fields
from object_detection.utils import static_shape
def expanded_shape(orig_shape, start_dim, num_dims):
"""Inserts multiple ones into a shape vector.
Inserts an all-1 vector of length num_dims at position start_dim into a shape.
Can be combined with tf.reshape to generalize tf.expand_dims.
Args:
orig_shape: the shape into which the all-1 vector is added (int32 vector)
start_dim: insertion position (int scalar)
num_dims: length of the inserted all-1 vector (int scalar)
Returns:
An int32 vector of length tf.size(orig_shape) + num_dims.
"""
with tf.name_scope('ExpandedShape'):
start_dim = tf.expand_dims(start_dim, 0) # scalar to rank-1
before = tf.slice(orig_shape, [0], start_dim)
add_shape = tf.ones(tf.reshape(num_dims, [1]), dtype=tf.int32)
after = tf.slice(orig_shape, start_dim, [-1])
new_shape = tf.concat([before, add_shape, after], 0)
return new_shape
def normalized_to_image_coordinates(normalized_boxes, image_shape,
parallel_iterations=32):
"""Converts a batch of boxes from normal to image coordinates.
Args:
normalized_boxes: a float32 tensor of shape [None, num_boxes, 4] in
normalized coordinates.
image_shape: a float32 tensor of shape [4] containing the image shape.
parallel_iterations: parallelism for the map_fn op.
Returns:
absolute_boxes: a float32 tensor of shape [None, num_boxes, 4] containg the
boxes in image coordinates.
"""
def _to_absolute_coordinates(normalized_boxes):
return box_list_ops.to_absolute_coordinates(
box_list.BoxList(normalized_boxes),
image_shape[1], image_shape[2], check_range=False).get()
absolute_boxes = tf.map_fn(
_to_absolute_coordinates,
elems=(normalized_boxes),
dtype=tf.float32,
parallel_iterations=parallel_iterations,
back_prop=True)
return absolute_boxes
def meshgrid(x, y):
"""Tiles the contents of x and y into a pair of grids.
Multidimensional analog of numpy.meshgrid, giving the same behavior if x and y
are vectors. Generally, this will give:
xgrid(i1, ..., i_m, j_1, ..., j_n) = x(j_1, ..., j_n)
ygrid(i1, ..., i_m, j_1, ..., j_n) = y(i_1, ..., i_m)
Keep in mind that the order of the arguments and outputs is reverse relative
to the order of the indices they go into, done for compatibility with numpy.
The output tensors have the same shapes. Specifically:
xgrid.get_shape() = y.get_shape().concatenate(x.get_shape())
ygrid.get_shape() = y.get_shape().concatenate(x.get_shape())
Args:
x: A tensor of arbitrary shape and rank. xgrid will contain these values
varying in its last dimensions.
y: A tensor of arbitrary shape and rank. ygrid will contain these values
varying in its first dimensions.
Returns:
A tuple of tensors (xgrid, ygrid).
"""
with tf.name_scope('Meshgrid'):
x = tf.convert_to_tensor(x)
y = tf.convert_to_tensor(y)
x_exp_shape = expanded_shape(tf.shape(x), 0, tf.rank(y))
y_exp_shape = expanded_shape(tf.shape(y), tf.rank(y), tf.rank(x))
xgrid = tf.tile(tf.reshape(x, x_exp_shape), y_exp_shape)
ygrid = tf.tile(tf.reshape(y, y_exp_shape), x_exp_shape)
new_shape = y.get_shape().concatenate(x.get_shape())
xgrid.set_shape(new_shape)
ygrid.set_shape(new_shape)
return xgrid, ygrid
def pad_to_multiple(tensor, multiple):
"""Returns the tensor zero padded to the specified multiple.
Appends 0s to the end of the first and second dimension (height and width) of
the tensor until both dimensions are a multiple of the input argument
'multiple'. E.g. given an input tensor of shape [1, 3, 5, 1] and an input
multiple of 4, PadToMultiple will append 0s so that the resulting tensor will
be of shape [1, 4, 8, 1].
Args:
tensor: rank 4 float32 tensor, where
tensor -> [batch_size, height, width, channels].
multiple: the multiple to pad to.
Returns:
padded_tensor: the tensor zero padded to the specified multiple.
"""
tensor_shape = tensor.get_shape()
batch_size = static_shape.get_batch_size(tensor_shape)
tensor_height = static_shape.get_height(tensor_shape)
tensor_width = static_shape.get_width(tensor_shape)
tensor_depth = static_shape.get_depth(tensor_shape)
if batch_size is None:
batch_size = tf.shape(tensor)[0]
if tensor_height is None:
tensor_height = tf.shape(tensor)[1]
padded_tensor_height = tf.to_int32(
tf.ceil(tf.to_float(tensor_height) / tf.to_float(multiple))) * multiple
else:
padded_tensor_height = int(
math.ceil(float(tensor_height) / multiple) * multiple)
if tensor_width is None:
tensor_width = tf.shape(tensor)[2]
padded_tensor_width = tf.to_int32(
tf.ceil(tf.to_float(tensor_width) / tf.to_float(multiple))) * multiple
else:
padded_tensor_width = int(
math.ceil(float(tensor_width) / multiple) * multiple)
if tensor_depth is None:
tensor_depth = tf.shape(tensor)[3]
# Use tf.concat instead of tf.pad to preserve static shape
height_pad = tf.zeros([
batch_size, padded_tensor_height - tensor_height, tensor_width,
tensor_depth
])
padded_tensor = tf.concat([tensor, height_pad], 1)
width_pad = tf.zeros([
batch_size, padded_tensor_height, padded_tensor_width - tensor_width,
tensor_depth
])
padded_tensor = tf.concat([padded_tensor, width_pad], 2)
return padded_tensor
def padded_one_hot_encoding(indices, depth, left_pad):
"""Returns a zero padded one-hot tensor.
This function converts a sparse representation of indices (e.g., [4]) to a
zero padded one-hot representation (e.g., [0, 0, 0, 0, 1] with depth = 4 and
left_pad = 1). If `indices` is empty, the result will simply be a tensor of
shape (0, depth + left_pad). If depth = 0, then this function just returns
`None`.
Args:
indices: an integer tensor of shape [num_indices].
depth: depth for the one-hot tensor (integer).
left_pad: number of zeros to left pad the one-hot tensor with (integer).
Returns:
padded_onehot: a tensor with shape (num_indices, depth + left_pad). Returns
`None` if the depth is zero.
Raises:
ValueError: if `indices` does not have rank 1 or if `left_pad` or `depth are
either negative or non-integers.
TODO: add runtime checks for depth and indices.
"""
if depth < 0 or not isinstance(depth, (int, long)):
raise ValueError('`depth` must be a non-negative integer.')
if left_pad < 0 or not isinstance(left_pad, (int, long)):
raise ValueError('`left_pad` must be a non-negative integer.')
if depth == 0:
return None
if len(indices.get_shape().as_list()) != 1:
raise ValueError('`indices` must have rank 1')
def one_hot_and_pad():
one_hot = tf.cast(tf.one_hot(tf.cast(indices, tf.int64), depth,
on_value=1, off_value=0), tf.float32)
return tf.pad(one_hot, [[0, 0], [left_pad, 0]], mode='CONSTANT')
result = tf.cond(tf.greater(tf.size(indices), 0), one_hot_and_pad,
lambda: tf.zeros((depth + left_pad, 0)))
return tf.reshape(result, [-1, depth + left_pad])
def dense_to_sparse_boxes(dense_locations, dense_num_boxes, num_classes):
"""Converts bounding boxes from dense to sparse form.
Args:
dense_locations: a [max_num_boxes, 4] tensor in which only the first k rows
are valid bounding box location coordinates, where k is the sum of
elements in dense_num_boxes.
dense_num_boxes: a [max_num_classes] tensor indicating the counts of
various bounding box classes e.g. [1, 0, 0, 2] means that the first
bounding box is of class 0 and the second and third bounding boxes are
of class 3. The sum of elements in this tensor is the number of valid
bounding boxes.
num_classes: number of classes
Returns:
box_locations: a [num_boxes, 4] tensor containing only valid bounding
boxes (i.e. the first num_boxes rows of dense_locations)
box_classes: a [num_boxes] tensor containing the classes of each bounding
box (e.g. dense_num_boxes = [1, 0, 0, 2] => box_classes = [0, 3, 3]
"""
num_valid_boxes = tf.reduce_sum(dense_num_boxes)
box_locations = tf.slice(dense_locations,
tf.constant([0, 0]), tf.stack([num_valid_boxes, 4]))
tiled_classes = [tf.tile([i], tf.expand_dims(dense_num_boxes[i], 0))
for i in range(num_classes)]
box_classes = tf.concat(tiled_classes, 0)
box_locations.set_shape([None, 4])
return box_locations, box_classes
def indices_to_dense_vector(indices,
size,
indices_value=1.,
default_value=0,
dtype=tf.float32):
"""Creates dense vector with indices set to specific value and rest to zeros.
This function exists because it is unclear if it is safe to use
tf.sparse_to_dense(indices, [size], 1, validate_indices=False)
with indices which are not ordered.
This function accepts a dynamic size (e.g. tf.shape(tensor)[0])
Args:
indices: 1d Tensor with integer indices which are to be set to
indices_values.
size: scalar with size (integer) of output Tensor.
indices_value: values of elements specified by indices in the output vector
default_value: values of other elements in the output vector.
dtype: data type.
Returns:
dense 1D Tensor of shape [size] with indices set to indices_values and the
rest set to default_value.
"""
size = tf.to_int32(size)
zeros = tf.ones([size], dtype=dtype) * default_value
values = tf.ones_like(indices, dtype=dtype) * indices_value
return tf.dynamic_stitch([tf.range(size), tf.to_int32(indices)],
[zeros, values])
def retain_groundtruth(tensor_dict, valid_indices):
"""Retains groundtruth by valid indices.
Args:
tensor_dict: a dictionary of following groundtruth tensors -
fields.InputDataFields.groundtruth_boxes
fields.InputDataFields.groundtruth_classes
fields.InputDataFields.groundtruth_is_crowd
fields.InputDataFields.groundtruth_area
fields.InputDataFields.groundtruth_label_types
fields.InputDataFields.groundtruth_difficult
valid_indices: a tensor with valid indices for the box-level groundtruth.
Returns:
a dictionary of tensors containing only the groundtruth for valid_indices.
Raises:
ValueError: If the shape of valid_indices is invalid.
ValueError: field fields.InputDataFields.groundtruth_boxes is
not present in tensor_dict.
"""
input_shape = valid_indices.get_shape().as_list()
if not (len(input_shape) == 1 or
(len(input_shape) == 2 and input_shape[1] == 1)):
raise ValueError('The shape of valid_indices is invalid.')
valid_indices = tf.reshape(valid_indices, [-1])
valid_dict = {}
if fields.InputDataFields.groundtruth_boxes in tensor_dict:
# Prevents reshape failure when num_boxes is 0.
num_boxes = tf.maximum(tf.shape(
tensor_dict[fields.InputDataFields.groundtruth_boxes])[0], 1)
for key in tensor_dict:
if key in [fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes]:
valid_dict[key] = tf.gather(tensor_dict[key], valid_indices)
# Input decoder returns empty tensor when these fields are not provided.
# Needs to reshape into [num_boxes, -1] for tf.gather() to work.
elif key in [fields.InputDataFields.groundtruth_is_crowd,
fields.InputDataFields.groundtruth_area,
fields.InputDataFields.groundtruth_difficult,
fields.InputDataFields.groundtruth_label_types]:
valid_dict[key] = tf.reshape(
tf.gather(tf.reshape(tensor_dict[key], [num_boxes, -1]),
valid_indices), [-1])
# Fields that are not associated with boxes.
else:
valid_dict[key] = tensor_dict[key]
else:
raise ValueError('%s not present in input tensor dict.' % (
fields.InputDataFields.groundtruth_boxes))
return valid_dict
def retain_groundtruth_with_positive_classes(tensor_dict):
"""Retains only groundtruth with positive class ids.
Args:
tensor_dict: a dictionary of following groundtruth tensors -
fields.InputDataFields.groundtruth_boxes
fields.InputDataFields.groundtruth_classes
fields.InputDataFields.groundtruth_is_crowd
fields.InputDataFields.groundtruth_area
fields.InputDataFields.groundtruth_label_types
fields.InputDataFields.groundtruth_difficult
Returns:
a dictionary of tensors containing only the groundtruth with positive
classes.
Raises:
ValueError: If groundtruth_classes tensor is not in tensor_dict.
"""
if fields.InputDataFields.groundtruth_classes not in tensor_dict:
raise ValueError('`groundtruth classes` not in tensor_dict.')
keep_indices = tf.where(tf.greater(
tensor_dict[fields.InputDataFields.groundtruth_classes], 0))
return retain_groundtruth(tensor_dict, keep_indices)
def filter_groundtruth_with_nan_box_coordinates(tensor_dict):
"""Filters out groundtruth with no bounding boxes.
Args:
tensor_dict: a dictionary of following groundtruth tensors -
fields.InputDataFields.groundtruth_boxes
fields.InputDataFields.groundtruth_classes
fields.InputDataFields.groundtruth_is_crowd
fields.InputDataFields.groundtruth_area
fields.InputDataFields.groundtruth_label_types
Returns:
a dictionary of tensors containing only the groundtruth that have bounding
boxes.
"""
groundtruth_boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
nan_indicator_vector = tf.greater(tf.reduce_sum(tf.to_int32(
tf.is_nan(groundtruth_boxes)), reduction_indices=[1]), 0)
valid_indicator_vector = tf.logical_not(nan_indicator_vector)
valid_indices = tf.where(valid_indicator_vector)
return retain_groundtruth(tensor_dict, valid_indices)
def normalize_to_target(inputs,
target_norm_value,
dim,
epsilon=1e-7,
trainable=True,
scope='NormalizeToTarget',
summarize=True):
"""L2 normalizes the inputs across the specified dimension to a target norm.
This op implements the L2 Normalization layer introduced in
Liu, Wei, et al. "SSD: Single Shot MultiBox Detector."
and Liu, Wei, Andrew Rabinovich, and Alexander C. Berg.
"Parsenet: Looking wider to see better." and is useful for bringing
activations from multiple layers in a convnet to a standard scale.
Note that the rank of `inputs` must be known and the dimension to which
normalization is to be applied should be statically defined.
TODO: Add option to scale by L2 norm of the entire input.
Args:
inputs: A `Tensor` of arbitrary size.
target_norm_value: A float value that specifies an initial target norm or
a list of floats (whose length must be equal to the depth along the
dimension to be normalized) specifying a per-dimension multiplier
after normalization.
dim: The dimension along which the input is normalized.
epsilon: A small value to add to the inputs to avoid dividing by zero.
trainable: Whether the norm is trainable or not
scope: Optional scope for variable_scope.
summarize: Whether or not to add a tensorflow summary for the op.
Returns:
The input tensor normalized to the specified target norm.
Raises:
ValueError: If dim is smaller than the number of dimensions in 'inputs'.
ValueError: If target_norm_value is not a float or a list of floats with
length equal to the depth along the dimension to be normalized.
"""
with tf.variable_scope(scope, 'NormalizeToTarget', [inputs]):
if not inputs.get_shape():
raise ValueError('The input rank must be known.')
input_shape = inputs.get_shape().as_list()
input_rank = len(input_shape)
if dim < 0 or dim >= input_rank:
raise ValueError(
'dim must be non-negative but smaller than the input rank.')
if not input_shape[dim]:
raise ValueError('input shape should be statically defined along '
'the specified dimension.')
depth = input_shape[dim]
if not (isinstance(target_norm_value, float) or
(isinstance(target_norm_value, list) and
len(target_norm_value) == depth) and
all([isinstance(val, float) for val in target_norm_value])):
raise ValueError('target_norm_value must be a float or a list of floats '
'with length equal to the depth along the dimension to '
'be normalized.')
if isinstance(target_norm_value, float):
initial_norm = depth * [target_norm_value]
else:
initial_norm = target_norm_value
target_norm = tf.contrib.framework.model_variable(
name='weights', dtype=tf.float32,
initializer=tf.constant(initial_norm, dtype=tf.float32),
trainable=trainable)
if summarize:
mean = tf.reduce_mean(target_norm)
mean = tf.Print(mean, ['NormalizeToTarget:', mean])
tf.summary.scalar(tf.get_variable_scope().name, mean)
lengths = epsilon + tf.sqrt(tf.reduce_sum(tf.square(inputs), dim, True))
mult_shape = input_rank*[1]
mult_shape[dim] = depth
return tf.reshape(target_norm, mult_shape) * tf.truediv(inputs, lengths)
def position_sensitive_crop_regions(image,
boxes,
box_ind,
crop_size,
num_spatial_bins,
global_pool,
extrapolation_value=None):
"""Position-sensitive crop and pool rectangular regions from a feature grid.
The output crops are split into `spatial_bins_y` vertical bins
and `spatial_bins_x` horizontal bins. For each intersection of a vertical
and a horizontal bin the output values are gathered by performing
`tf.image.crop_and_resize` (bilinear resampling) on a a separate subset of
channels of the image. This reduces `depth` by a factor of
`(spatial_bins_y * spatial_bins_x)`.
When global_pool is True, this function implements a differentiable version
of position-sensitive RoI pooling used in
[R-FCN detection system](https://arxiv.org/abs/1605.06409).
When global_pool is False, this function implements a differentiable version
of position-sensitive assembling operation used in
[instance FCN](https://arxiv.org/abs/1603.08678).
Args:
image: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
`int16`, `int32`, `int64`, `half`, `float32`, `float64`.
A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
Both `image_height` and `image_width` need to be positive.
boxes: A `Tensor` of type `float32`.
A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
specifies the coordinates of a box in the `box_ind[i]` image and is
specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized
coordinate value of `y` is mapped to the image coordinate at
`y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image
height is mapped to `[0, image_height - 1] in image height coordinates.
We do allow y1 > y2, in which case the sampled crop is an up-down flipped
version of the original image. The width dimension is treated similarly.
Normalized coordinates outside the `[0, 1]` range are allowed, in which
case we use `extrapolation_value` to extrapolate the input image values.
box_ind: A `Tensor` of type `int32`.
A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
The value of `box_ind[i]` specifies the image that the `i`-th box refers
to.
crop_size: A list of two integers `[crop_height, crop_width]`. All
cropped image patches are resized to this size. The aspect ratio of the
image content is not preserved. Both `crop_height` and `crop_width` need
to be positive.
num_spatial_bins: A list of two integers `[spatial_bins_y, spatial_bins_x]`.
Represents the number of position-sensitive bins in y and x directions.
Both values should be >= 1. `crop_height` should be divisible by
`spatial_bins_y`, and similarly for width.
The number of image channels should be divisible by
(spatial_bins_y * spatial_bins_x).
Suggested value from R-FCN paper: [3, 3].
global_pool: A boolean variable.
If True, we perform average global pooling on the features assembled from
the position-sensitive score maps.
If False, we keep the position-pooled features without global pooling
over the spatial coordinates.
Note that using global_pool=True is equivalent to but more efficient than
running the function with global_pool=False and then performing global
average pooling.
extrapolation_value: An optional `float`. Defaults to `0`.
Value used for extrapolation, when applicable.
Returns:
position_sensitive_features: A 4-D tensor of shape
`[num_boxes, K, K, crop_channels]`,
where `crop_channels = depth / (spatial_bins_y * spatial_bins_x)`,
where K = 1 when global_pool is True (Average-pooled cropped regions),
and K = crop_size when global_pool is False.
Raises:
ValueError: Raised in four situations:
`num_spatial_bins` is not >= 1;
`num_spatial_bins` does not divide `crop_size`;
`(spatial_bins_y*spatial_bins_x)` does not divide `depth`;
`bin_crop_size` is not square when global_pool=False due to the
constraint in function space_to_depth.
"""
total_bins = 1
bin_crop_size = []
for (num_bins, crop_dim) in zip(num_spatial_bins, crop_size):
if num_bins < 1:
raise ValueError('num_spatial_bins should be >= 1')
if crop_dim % num_bins != 0:
raise ValueError('crop_size should be divisible by num_spatial_bins')
total_bins *= num_bins
bin_crop_size.append(crop_dim / num_bins)
if not global_pool and bin_crop_size[0] != bin_crop_size[1]:
raise ValueError('Only support square bin crop size for now.')
ymin, xmin, ymax, xmax = tf.unstack(boxes, axis=1)
spatial_bins_y, spatial_bins_x = num_spatial_bins
# Split each box into spatial_bins_y * spatial_bins_x bins.
position_sensitive_boxes = []
for bin_y in range(spatial_bins_y):
step_y = (ymax - ymin) / spatial_bins_y
for bin_x in range(spatial_bins_x):
step_x = (xmax - xmin) / spatial_bins_x
box_coordinates = [ymin + bin_y * step_y,
xmin + bin_x * step_x,
ymin + (bin_y + 1) * step_y,
xmin + (bin_x + 1) * step_x,
]
position_sensitive_boxes.append(tf.stack(box_coordinates, axis=1))
image_splits = tf.split(value=image, num_or_size_splits=total_bins, axis=3)
image_crops = []
for (split, box) in zip(image_splits, position_sensitive_boxes):
crop = tf.image.crop_and_resize(split, box, box_ind, bin_crop_size,
extrapolation_value=extrapolation_value)
image_crops.append(crop)
if global_pool:
# Average over all bins.
position_sensitive_features = tf.add_n(image_crops) / len(image_crops)
# Then average over spatial positions within the bins.
position_sensitive_features = tf.reduce_mean(
position_sensitive_features, [1, 2], keep_dims=True)
else:
# Reorder height/width to depth channel.
block_size = bin_crop_size[0]
if block_size >= 2:
image_crops = [tf.space_to_depth(
crop, block_size=block_size) for crop in image_crops]
# Pack image_crops so that first dimension is for position-senstive boxes.
position_sensitive_features = tf.stack(image_crops, axis=0)
# Unroll the position-sensitive boxes to spatial positions.
position_sensitive_features = tf.squeeze(
tf.batch_to_space_nd(position_sensitive_features,
block_shape=[1] + num_spatial_bins,
crops=tf.zeros((3, 2), dtype=tf.int32)),
squeeze_dims=[0])
# Reorder back the depth channel.
if block_size >= 2:
position_sensitive_features = tf.depth_to_space(
position_sensitive_features, block_size=block_size)
return position_sensitive_features
def reframe_box_masks_to_image_masks(box_masks, boxes, image_height,
image_width):
"""Transforms the box masks back to full image masks.
Embeds masks in bounding boxes of larger masks whose shapes correspond to
image shape.
Args:
box_masks: A tf.float32 tensor of size [num_masks, mask_height, mask_width].
boxes: A tf.float32 tensor of size [num_masks, 4] containing the box
corners. Row i contains [ymin, xmin, ymax, xmax] of the box
corresponding to mask i. Note that the box corners are in
normalized coordinates.
image_height: Image height. The output mask will have the same height as
the image height.
image_width: Image width. The output mask will have the same width as the
image width.
Returns:
A tf.float32 tensor of size [num_masks, image_height, image_width].
"""
# TODO: Make this a public function.
def transform_boxes_relative_to_boxes(boxes, reference_boxes):
boxes = tf.reshape(boxes, [-1, 2, 2])
min_corner = tf.expand_dims(reference_boxes[:, 0:2], 1)
max_corner = tf.expand_dims(reference_boxes[:, 2:4], 1)
transformed_boxes = (boxes - min_corner) / (max_corner - min_corner)
return tf.reshape(transformed_boxes, [-1, 4])
box_masks = tf.expand_dims(box_masks, axis=3)
num_boxes = tf.shape(box_masks)[0]
unit_boxes = tf.concat(
[tf.zeros([num_boxes, 2]), tf.ones([num_boxes, 2])], axis=1)
reverse_boxes = transform_boxes_relative_to_boxes(unit_boxes, boxes)
image_masks = tf.image.crop_and_resize(image=box_masks,
boxes=reverse_boxes,
box_ind=tf.range(num_boxes),
crop_size=[image_height, image_width],
extrapolation_value=0.0)
return tf.squeeze(image_masks, axis=3)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.utils.ops."""
import numpy as np
import tensorflow as tf
from object_detection.core import standard_fields as fields
from object_detection.utils import ops
class NormalizedToImageCoordinatesTest(tf.test.TestCase):
def test_normalized_to_image_coordinates(self):
normalized_boxes = tf.placeholder(tf.float32, shape=(None, 1, 4))
normalized_boxes_np = np.array([[[0.0, 0.0, 1.0, 1.0]],
[[0.5, 0.5, 1.0, 1.0]]])
image_shape = tf.convert_to_tensor([1, 4, 4, 3], dtype=tf.int32)
absolute_boxes = ops.normalized_to_image_coordinates(normalized_boxes,
image_shape,
parallel_iterations=2)
expected_boxes = np.array([[[0, 0, 4, 4]],
[[2, 2, 4, 4]]])
with self.test_session() as sess:
absolute_boxes = sess.run(absolute_boxes,
feed_dict={normalized_boxes:
normalized_boxes_np})
self.assertAllEqual(absolute_boxes, expected_boxes)
class MeshgridTest(tf.test.TestCase):
def test_meshgrid_numpy_comparison(self):
"""Tests meshgrid op with vectors, for which it should match numpy."""
x = np.arange(4)
y = np.arange(6)
exp_xgrid, exp_ygrid = np.meshgrid(x, y)
xgrid, ygrid = ops.meshgrid(x, y)
with self.test_session() as sess:
xgrid_output, ygrid_output = sess.run([xgrid, ygrid])
self.assertAllEqual(xgrid_output, exp_xgrid)
self.assertAllEqual(ygrid_output, exp_ygrid)
def test_meshgrid_multidimensional(self):
np.random.seed(18)
x = np.random.rand(4, 1, 2).astype(np.float32)
y = np.random.rand(2, 3).astype(np.float32)
xgrid, ygrid = ops.meshgrid(x, y)
grid_shape = list(y.shape) + list(x.shape)
self.assertEqual(xgrid.get_shape().as_list(), grid_shape)
self.assertEqual(ygrid.get_shape().as_list(), grid_shape)
with self.test_session() as sess:
xgrid_output, ygrid_output = sess.run([xgrid, ygrid])
# Check the shape of the output grids
self.assertEqual(xgrid_output.shape, tuple(grid_shape))
self.assertEqual(ygrid_output.shape, tuple(grid_shape))
# Check a few elements
test_elements = [((3, 0, 0), (1, 2)),
((2, 0, 1), (0, 0)),
((0, 0, 0), (1, 1))]
for xind, yind in test_elements:
# These are float equality tests, but the meshgrid op should not introduce
# rounding.
self.assertEqual(xgrid_output[yind + xind], x[xind])
self.assertEqual(ygrid_output[yind + xind], y[yind])
class OpsTestPadToMultiple(tf.test.TestCase):
def test_zero_padding(self):
tensor = tf.constant([[[[0.], [0.]], [[0.], [0.]]]])
padded_tensor = ops.pad_to_multiple(tensor, 1)
with self.test_session() as sess:
padded_tensor_out = sess.run(padded_tensor)
self.assertEqual((1, 2, 2, 1), padded_tensor_out.shape)
def test_no_padding(self):
tensor = tf.constant([[[[0.], [0.]], [[0.], [0.]]]])
padded_tensor = ops.pad_to_multiple(tensor, 2)
with self.test_session() as sess:
padded_tensor_out = sess.run(padded_tensor)
self.assertEqual((1, 2, 2, 1), padded_tensor_out.shape)
def test_padding(self):
tensor = tf.constant([[[[0.], [0.]], [[0.], [0.]]]])
padded_tensor = ops.pad_to_multiple(tensor, 4)
with self.test_session() as sess:
padded_tensor_out = sess.run(padded_tensor)
self.assertEqual((1, 4, 4, 1), padded_tensor_out.shape)
class OpsTestPaddedOneHotEncoding(tf.test.TestCase):
def test_correct_one_hot_tensor_with_no_pad(self):
indices = tf.constant([1, 2, 3, 5])
one_hot_tensor = ops.padded_one_hot_encoding(indices, depth=6, left_pad=0)
expected_tensor = np.array([[0, 1, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0],
[0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 1]], np.float32)
with self.test_session() as sess:
out_one_hot_tensor = sess.run(one_hot_tensor)
self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10,
atol=1e-10)
def test_correct_one_hot_tensor_with_pad_one(self):
indices = tf.constant([1, 2, 3, 5])
one_hot_tensor = ops.padded_one_hot_encoding(indices, depth=6, left_pad=1)
expected_tensor = np.array([[0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 1]], np.float32)
with self.test_session() as sess:
out_one_hot_tensor = sess.run(one_hot_tensor)
self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10,
atol=1e-10)
def test_correct_one_hot_tensor_with_pad_three(self):
indices = tf.constant([1, 2, 3, 5])
one_hot_tensor = ops.padded_one_hot_encoding(indices, depth=6, left_pad=3)
expected_tensor = np.array([[0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 1]], np.float32)
with self.test_session() as sess:
out_one_hot_tensor = sess.run(one_hot_tensor)
self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10,
atol=1e-10)
def test_correct_padded_one_hot_tensor_with_empty_indices(self):
depth = 6
pad = 2
indices = tf.constant([])
one_hot_tensor = ops.padded_one_hot_encoding(
indices, depth=depth, left_pad=pad)
expected_tensor = np.zeros((0, depth + pad))
with self.test_session() as sess:
out_one_hot_tensor = sess.run(one_hot_tensor)
self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10,
atol=1e-10)
def test_return_none_on_zero_depth(self):
indices = tf.constant([1, 2, 3, 4, 5])
one_hot_tensor = ops.padded_one_hot_encoding(indices, depth=0, left_pad=2)
self.assertEqual(one_hot_tensor, None)
def test_raise_value_error_on_rank_two_input(self):
indices = tf.constant(1.0, shape=(2, 3))
with self.assertRaises(ValueError):
ops.padded_one_hot_encoding(indices, depth=6, left_pad=2)
def test_raise_value_error_on_negative_pad(self):
indices = tf.constant(1.0, shape=(2, 3))
with self.assertRaises(ValueError):
ops.padded_one_hot_encoding(indices, depth=6, left_pad=-1)
def test_raise_value_error_on_float_pad(self):
indices = tf.constant(1.0, shape=(2, 3))
with self.assertRaises(ValueError):
ops.padded_one_hot_encoding(indices, depth=6, left_pad=0.1)
def test_raise_value_error_on_float_depth(self):
indices = tf.constant(1.0, shape=(2, 3))
with self.assertRaises(ValueError):
ops.padded_one_hot_encoding(indices, depth=0.1, left_pad=2)
class OpsDenseToSparseBoxesTest(tf.test.TestCase):
def test_return_all_boxes_when_all_input_boxes_are_valid(self):
num_classes = 4
num_valid_boxes = 3
code_size = 4
dense_location_placeholder = tf.placeholder(tf.float32,
shape=(num_valid_boxes,
code_size))
dense_num_boxes_placeholder = tf.placeholder(tf.int32, shape=(num_classes))
box_locations, box_classes = ops.dense_to_sparse_boxes(
dense_location_placeholder, dense_num_boxes_placeholder, num_classes)
feed_dict = {dense_location_placeholder: np.random.uniform(
size=[num_valid_boxes, code_size]),
dense_num_boxes_placeholder: np.array([1, 0, 0, 2],
dtype=np.int32)}
expected_box_locations = feed_dict[dense_location_placeholder]
expected_box_classses = np.array([0, 3, 3])
with self.test_session() as sess:
box_locations, box_classes = sess.run([box_locations, box_classes],
feed_dict=feed_dict)
self.assertAllClose(box_locations, expected_box_locations, rtol=1e-6,
atol=1e-6)
self.assertAllEqual(box_classes, expected_box_classses)
def test_return_only_valid_boxes_when_input_contains_invalid_boxes(self):
num_classes = 4
num_valid_boxes = 3
num_boxes = 10
code_size = 4
dense_location_placeholder = tf.placeholder(tf.float32, shape=(num_boxes,
code_size))
dense_num_boxes_placeholder = tf.placeholder(tf.int32, shape=(num_classes))
box_locations, box_classes = ops.dense_to_sparse_boxes(
dense_location_placeholder, dense_num_boxes_placeholder, num_classes)
feed_dict = {dense_location_placeholder: np.random.uniform(
size=[num_boxes, code_size]),
dense_num_boxes_placeholder: np.array([1, 0, 0, 2],
dtype=np.int32)}
expected_box_locations = (feed_dict[dense_location_placeholder]
[:num_valid_boxes])
expected_box_classses = np.array([0, 3, 3])
with self.test_session() as sess:
box_locations, box_classes = sess.run([box_locations, box_classes],
feed_dict=feed_dict)
self.assertAllClose(box_locations, expected_box_locations, rtol=1e-6,
atol=1e-6)
self.assertAllEqual(box_classes, expected_box_classses)
class OpsTestIndicesToDenseVector(tf.test.TestCase):
def test_indices_to_dense_vector(self):
size = 10000
num_indices = np.random.randint(size)
rand_indices = np.random.permutation(np.arange(size))[0:num_indices]
expected_output = np.zeros(size, dtype=np.float32)
expected_output[rand_indices] = 1.
tf_rand_indices = tf.constant(rand_indices)
indicator = ops.indices_to_dense_vector(tf_rand_indices, size)
with self.test_session() as sess:
output = sess.run(indicator)
self.assertAllEqual(output, expected_output)
self.assertEqual(output.dtype, expected_output.dtype)
def test_indices_to_dense_vector_size_at_inference(self):
size = 5000
num_indices = 250
all_indices = np.arange(size)
rand_indices = np.random.permutation(all_indices)[0:num_indices]
expected_output = np.zeros(size, dtype=np.float32)
expected_output[rand_indices] = 1.
tf_all_indices = tf.placeholder(tf.int32)
tf_rand_indices = tf.constant(rand_indices)
indicator = ops.indices_to_dense_vector(tf_rand_indices,
tf.shape(tf_all_indices)[0])
feed_dict = {tf_all_indices: all_indices}
with self.test_session() as sess:
output = sess.run(indicator, feed_dict=feed_dict)
self.assertAllEqual(output, expected_output)
self.assertEqual(output.dtype, expected_output.dtype)
def test_indices_to_dense_vector_int(self):
size = 500
num_indices = 25
rand_indices = np.random.permutation(np.arange(size))[0:num_indices]
expected_output = np.zeros(size, dtype=np.int64)
expected_output[rand_indices] = 1
tf_rand_indices = tf.constant(rand_indices)
indicator = ops.indices_to_dense_vector(
tf_rand_indices, size, 1, dtype=tf.int64)
with self.test_session() as sess:
output = sess.run(indicator)
self.assertAllEqual(output, expected_output)
self.assertEqual(output.dtype, expected_output.dtype)
def test_indices_to_dense_vector_custom_values(self):
size = 100
num_indices = 10
rand_indices = np.random.permutation(np.arange(size))[0:num_indices]
indices_value = np.random.rand(1)
default_value = np.random.rand(1)
expected_output = np.float32(np.ones(size) * default_value)
expected_output[rand_indices] = indices_value
tf_rand_indices = tf.constant(rand_indices)
indicator = ops.indices_to_dense_vector(
tf_rand_indices,
size,
indices_value=indices_value,
default_value=default_value)
with self.test_session() as sess:
output = sess.run(indicator)
self.assertAllClose(output, expected_output)
self.assertEqual(output.dtype, expected_output.dtype)
def test_indices_to_dense_vector_all_indices_as_input(self):
size = 500
num_indices = 500
rand_indices = np.random.permutation(np.arange(size))[0:num_indices]
expected_output = np.ones(size, dtype=np.float32)
tf_rand_indices = tf.constant(rand_indices)
indicator = ops.indices_to_dense_vector(tf_rand_indices, size)
with self.test_session() as sess:
output = sess.run(indicator)
self.assertAllEqual(output, expected_output)
self.assertEqual(output.dtype, expected_output.dtype)
def test_indices_to_dense_vector_empty_indices_as_input(self):
size = 500
rand_indices = []
expected_output = np.zeros(size, dtype=np.float32)
tf_rand_indices = tf.constant(rand_indices)
indicator = ops.indices_to_dense_vector(tf_rand_indices, size)
with self.test_session() as sess:
output = sess.run(indicator)
self.assertAllEqual(output, expected_output)
self.assertEqual(output.dtype, expected_output.dtype)
class GroundtruthFilterTest(tf.test.TestCase):
def test_filter_groundtruth(self):
input_image = tf.placeholder(tf.float32, shape=(None, None, 3))
input_boxes = tf.placeholder(tf.float32, shape=(None, 4))
input_classes = tf.placeholder(tf.int32, shape=(None,))
input_is_crowd = tf.placeholder(tf.bool, shape=(None,))
input_area = tf.placeholder(tf.float32, shape=(None,))
input_difficult = tf.placeholder(tf.float32, shape=(None,))
input_label_types = tf.placeholder(tf.string, shape=(None,))
valid_indices = tf.placeholder(tf.int32, shape=(None,))
input_tensors = {
fields.InputDataFields.image: input_image,
fields.InputDataFields.groundtruth_boxes: input_boxes,
fields.InputDataFields.groundtruth_classes: input_classes,
fields.InputDataFields.groundtruth_is_crowd: input_is_crowd,
fields.InputDataFields.groundtruth_area: input_area,
fields.InputDataFields.groundtruth_difficult: input_difficult,
fields.InputDataFields.groundtruth_label_types: input_label_types
}
output_tensors = ops.retain_groundtruth(input_tensors, valid_indices)
image_tensor = np.random.rand(224, 224, 3)
feed_dict = {
input_image: image_tensor,
input_boxes:
np.array([[0.2, 0.4, 0.1, 0.8], [0.2, 0.4, 1.0, 0.8]], dtype=np.float),
input_classes:
np.array([1, 2], dtype=np.int32),
input_is_crowd:
np.array([False, True], dtype=np.bool),
input_area:
np.array([32, 48], dtype=np.float32),
input_difficult:
np.array([True, False], dtype=np.bool),
input_label_types:
np.array(['APPROPRIATE', 'INCORRECT'], dtype=np.string_),
valid_indices:
np.array([0], dtype=np.int32)
}
expected_tensors = {
fields.InputDataFields.image:
image_tensor,
fields.InputDataFields.groundtruth_boxes:
[[0.2, 0.4, 0.1, 0.8]],
fields.InputDataFields.groundtruth_classes:
[1],
fields.InputDataFields.groundtruth_is_crowd:
[False],
fields.InputDataFields.groundtruth_area:
[32],
fields.InputDataFields.groundtruth_difficult:
[True],
fields.InputDataFields.groundtruth_label_types:
['APPROPRIATE']
}
with self.test_session() as sess:
output_tensors = sess.run(output_tensors, feed_dict=feed_dict)
for key in [fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_area]:
self.assertAllClose(expected_tensors[key], output_tensors[key])
for key in [fields.InputDataFields.groundtruth_classes,
fields.InputDataFields.groundtruth_is_crowd,
fields.InputDataFields.groundtruth_label_types]:
self.assertAllEqual(expected_tensors[key], output_tensors[key])
def test_filter_with_missing_fields(self):
input_boxes = tf.placeholder(tf.float32, shape=(None, 4))
input_classes = tf.placeholder(tf.int32, shape=(None,))
input_tensors = {
fields.InputDataFields.groundtruth_boxes: input_boxes,
fields.InputDataFields.groundtruth_classes: input_classes
}
valid_indices = tf.placeholder(tf.int32, shape=(None,))
feed_dict = {
input_boxes:
np.array([[0.2, 0.4, 0.1, 0.8], [0.2, 0.4, 1.0, 0.8]], dtype=np.float),
input_classes:
np.array([1, 2], dtype=np.int32),
valid_indices:
np.array([0], dtype=np.int32)
}
expected_tensors = {
fields.InputDataFields.groundtruth_boxes:
[[0.2, 0.4, 0.1, 0.8]],
fields.InputDataFields.groundtruth_classes:
[1]
}
output_tensors = ops.retain_groundtruth(input_tensors, valid_indices)
with self.test_session() as sess:
output_tensors = sess.run(output_tensors, feed_dict=feed_dict)
for key in [fields.InputDataFields.groundtruth_boxes]:
self.assertAllClose(expected_tensors[key], output_tensors[key])
for key in [fields.InputDataFields.groundtruth_classes]:
self.assertAllEqual(expected_tensors[key], output_tensors[key])
def test_filter_with_empty_fields(self):
input_boxes = tf.placeholder(tf.float32, shape=(None, 4))
input_classes = tf.placeholder(tf.int32, shape=(None,))
input_is_crowd = tf.placeholder(tf.bool, shape=(None,))
input_area = tf.placeholder(tf.float32, shape=(None,))
input_difficult = tf.placeholder(tf.float32, shape=(None,))
valid_indices = tf.placeholder(tf.int32, shape=(None,))
input_tensors = {
fields.InputDataFields.groundtruth_boxes: input_boxes,
fields.InputDataFields.groundtruth_classes: input_classes,
fields.InputDataFields.groundtruth_is_crowd: input_is_crowd,
fields.InputDataFields.groundtruth_area: input_area,
fields.InputDataFields.groundtruth_difficult: input_difficult
}
output_tensors = ops.retain_groundtruth(input_tensors, valid_indices)
feed_dict = {
input_boxes:
np.array([[0.2, 0.4, 0.1, 0.8], [0.2, 0.4, 1.0, 0.8]], dtype=np.float),
input_classes:
np.array([1, 2], dtype=np.int32),
input_is_crowd:
np.array([False, True], dtype=np.bool),
input_area:
np.array([], dtype=np.float32),
input_difficult:
np.array([], dtype=np.float32),
valid_indices:
np.array([0], dtype=np.int32)
}
expected_tensors = {
fields.InputDataFields.groundtruth_boxes:
[[0.2, 0.4, 0.1, 0.8]],
fields.InputDataFields.groundtruth_classes:
[1],
fields.InputDataFields.groundtruth_is_crowd:
[False],
fields.InputDataFields.groundtruth_area:
[],
fields.InputDataFields.groundtruth_difficult:
[]
}
with self.test_session() as sess:
output_tensors = sess.run(output_tensors, feed_dict=feed_dict)
for key in [fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_area]:
self.assertAllClose(expected_tensors[key], output_tensors[key])
for key in [fields.InputDataFields.groundtruth_classes,
fields.InputDataFields.groundtruth_is_crowd]:
self.assertAllEqual(expected_tensors[key], output_tensors[key])
def test_filter_with_empty_groundtruth_boxes(self):
input_boxes = tf.placeholder(tf.float32, shape=(None, 4))
input_classes = tf.placeholder(tf.int32, shape=(None,))
input_is_crowd = tf.placeholder(tf.bool, shape=(None,))
input_area = tf.placeholder(tf.float32, shape=(None,))
input_difficult = tf.placeholder(tf.float32, shape=(None,))
valid_indices = tf.placeholder(tf.int32, shape=(None,))
input_tensors = {
fields.InputDataFields.groundtruth_boxes: input_boxes,
fields.InputDataFields.groundtruth_classes: input_classes,
fields.InputDataFields.groundtruth_is_crowd: input_is_crowd,
fields.InputDataFields.groundtruth_area: input_area,
fields.InputDataFields.groundtruth_difficult: input_difficult
}
output_tensors = ops.retain_groundtruth(input_tensors, valid_indices)
feed_dict = {
input_boxes:
np.array([], dtype=np.float).reshape(0, 4),
input_classes:
np.array([], dtype=np.int32),
input_is_crowd:
np.array([], dtype=np.bool),
input_area:
np.array([], dtype=np.float32),
input_difficult:
np.array([], dtype=np.float32),
valid_indices:
np.array([], dtype=np.int32)
}
with self.test_session() as sess:
output_tensors = sess.run(output_tensors, feed_dict=feed_dict)
for key in input_tensors:
if key == fields.InputDataFields.groundtruth_boxes:
self.assertAllEqual([0, 4], output_tensors[key].shape)
else:
self.assertAllEqual([0], output_tensors[key].shape)
class RetainGroundTruthWithPositiveClasses(tf.test.TestCase):
def test_filter_groundtruth_with_positive_classes(self):
input_image = tf.placeholder(tf.float32, shape=(None, None, 3))
input_boxes = tf.placeholder(tf.float32, shape=(None, 4))
input_classes = tf.placeholder(tf.int32, shape=(None,))
input_is_crowd = tf.placeholder(tf.bool, shape=(None,))
input_area = tf.placeholder(tf.float32, shape=(None,))
input_difficult = tf.placeholder(tf.float32, shape=(None,))
input_label_types = tf.placeholder(tf.string, shape=(None,))
valid_indices = tf.placeholder(tf.int32, shape=(None,))
input_tensors = {
fields.InputDataFields.image: input_image,
fields.InputDataFields.groundtruth_boxes: input_boxes,
fields.InputDataFields.groundtruth_classes: input_classes,
fields.InputDataFields.groundtruth_is_crowd: input_is_crowd,
fields.InputDataFields.groundtruth_area: input_area,
fields.InputDataFields.groundtruth_difficult: input_difficult,
fields.InputDataFields.groundtruth_label_types: input_label_types
}
output_tensors = ops.retain_groundtruth_with_positive_classes(input_tensors)
image_tensor = np.random.rand(224, 224, 3)
feed_dict = {
input_image: image_tensor,
input_boxes:
np.array([[0.2, 0.4, 0.1, 0.8], [0.2, 0.4, 1.0, 0.8]], dtype=np.float),
input_classes:
np.array([1, 0], dtype=np.int32),
input_is_crowd:
np.array([False, True], dtype=np.bool),
input_area:
np.array([32, 48], dtype=np.float32),
input_difficult:
np.array([True, False], dtype=np.bool),
input_label_types:
np.array(['APPROPRIATE', 'INCORRECT'], dtype=np.string_),
valid_indices:
np.array([0], dtype=np.int32)
}
expected_tensors = {
fields.InputDataFields.image:
image_tensor,
fields.InputDataFields.groundtruth_boxes:
[[0.2, 0.4, 0.1, 0.8]],
fields.InputDataFields.groundtruth_classes:
[1],
fields.InputDataFields.groundtruth_is_crowd:
[False],
fields.InputDataFields.groundtruth_area:
[32],
fields.InputDataFields.groundtruth_difficult:
[True],
fields.InputDataFields.groundtruth_label_types:
['APPROPRIATE']
}
with self.test_session() as sess:
output_tensors = sess.run(output_tensors, feed_dict=feed_dict)
for key in [fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_area]:
self.assertAllClose(expected_tensors[key], output_tensors[key])
for key in [fields.InputDataFields.groundtruth_classes,
fields.InputDataFields.groundtruth_is_crowd,
fields.InputDataFields.groundtruth_label_types]:
self.assertAllEqual(expected_tensors[key], output_tensors[key])
class GroundtruthFilterWithNanBoxTest(tf.test.TestCase):
def test_filter_groundtruth_with_nan_box_coordinates(self):
input_tensors = {
fields.InputDataFields.groundtruth_boxes:
[[np.nan, np.nan, np.nan, np.nan], [0.2, 0.4, 0.1, 0.8]],
fields.InputDataFields.groundtruth_classes:
[1, 2],
fields.InputDataFields.groundtruth_is_crowd:
[False, True],
fields.InputDataFields.groundtruth_area:
[100.0, 238.7]
}
expected_tensors = {
fields.InputDataFields.groundtruth_boxes:
[[0.2, 0.4, 0.1, 0.8]],
fields.InputDataFields.groundtruth_classes:
[2],
fields.InputDataFields.groundtruth_is_crowd:
[True],
fields.InputDataFields.groundtruth_area:
[238.7]
}
output_tensors = ops.filter_groundtruth_with_nan_box_coordinates(
input_tensors)
with self.test_session() as sess:
output_tensors = sess.run(output_tensors)
for key in [fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_area]:
self.assertAllClose(expected_tensors[key], output_tensors[key])
for key in [fields.InputDataFields.groundtruth_classes,
fields.InputDataFields.groundtruth_is_crowd]:
self.assertAllEqual(expected_tensors[key], output_tensors[key])
class OpsTestNormalizeToTarget(tf.test.TestCase):
def test_create_normalize_to_target(self):
inputs = tf.random_uniform([5, 10, 12, 3])
target_norm_value = 4.0
dim = 3
with self.test_session():
output = ops.normalize_to_target(inputs, target_norm_value, dim)
self.assertEqual(output.op.name, 'NormalizeToTarget/mul')
var_name = tf.contrib.framework.get_variables()[0].name
self.assertEqual(var_name, 'NormalizeToTarget/weights:0')
def test_invalid_dim(self):
inputs = tf.random_uniform([5, 10, 12, 3])
target_norm_value = 4.0
dim = 10
with self.assertRaisesRegexp(
ValueError,
'dim must be non-negative but smaller than the input rank.'):
ops.normalize_to_target(inputs, target_norm_value, dim)
def test_invalid_target_norm_values(self):
inputs = tf.random_uniform([5, 10, 12, 3])
target_norm_value = [4.0, 4.0]
dim = 3
with self.assertRaisesRegexp(
ValueError, 'target_norm_value must be a float or a list of floats'):
ops.normalize_to_target(inputs, target_norm_value, dim)
def test_correct_output_shape(self):
inputs = tf.random_uniform([5, 10, 12, 3])
target_norm_value = 4.0
dim = 3
with self.test_session():
output = ops.normalize_to_target(inputs, target_norm_value, dim)
self.assertEqual(output.get_shape().as_list(),
inputs.get_shape().as_list())
def test_correct_initial_output_values(self):
inputs = tf.constant([[[[3, 4], [7, 24]],
[[5, -12], [-1, 0]]]], tf.float32)
target_norm_value = 10.0
dim = 3
expected_output = [[[[30/5.0, 40/5.0], [70/25.0, 240/25.0]],
[[50/13.0, -120/13.0], [-10, 0]]]]
with self.test_session() as sess:
normalized_inputs = ops.normalize_to_target(inputs, target_norm_value,
dim)
sess.run(tf.global_variables_initializer())
output = normalized_inputs.eval()
self.assertAllClose(output, expected_output)
def test_multiple_target_norm_values(self):
inputs = tf.constant([[[[3, 4], [7, 24]],
[[5, -12], [-1, 0]]]], tf.float32)
target_norm_value = [10.0, 20.0]
dim = 3
expected_output = [[[[30/5.0, 80/5.0], [70/25.0, 480/25.0]],
[[50/13.0, -240/13.0], [-10, 0]]]]
with self.test_session() as sess:
normalized_inputs = ops.normalize_to_target(inputs, target_norm_value,
dim)
sess.run(tf.global_variables_initializer())
output = normalized_inputs.eval()
self.assertAllClose(output, expected_output)
class OpsTestPositionSensitiveCropRegions(tf.test.TestCase):
def test_position_sensitive(self):
num_spatial_bins = [3, 2]
image_shape = [1, 3, 2, 6]
# First channel is 1's, second channel is 2's, etc.
image = tf.constant(range(1, 3 * 2 + 1) * 6, dtype=tf.float32,
shape=image_shape)
boxes = tf.random_uniform((2, 4))
box_ind = tf.constant([0, 0], dtype=tf.int32)
# The result for both boxes should be [[1, 2], [3, 4], [5, 6]]
# before averaging.
expected_output = np.array([3.5, 3.5]).reshape([2, 1, 1, 1])
for crop_size_mult in range(1, 3):
crop_size = [3 * crop_size_mult, 2 * crop_size_mult]
ps_crop_and_pool = ops.position_sensitive_crop_regions(
image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True)
with self.test_session() as sess:
output = sess.run(ps_crop_and_pool)
self.assertAllClose(output, expected_output)
def test_position_sensitive_with_equal_channels(self):
num_spatial_bins = [2, 2]
image_shape = [1, 3, 3, 4]
crop_size = [2, 2]
image = tf.constant(range(1, 3 * 3 + 1), dtype=tf.float32,
shape=[1, 3, 3, 1])
tiled_image = tf.tile(image, [1, 1, 1, image_shape[3]])
boxes = tf.random_uniform((3, 4))
box_ind = tf.constant([0, 0, 0], dtype=tf.int32)
# All channels are equal so position-sensitive crop and resize should
# work as the usual crop and resize for just one channel.
crop = tf.image.crop_and_resize(image, boxes, box_ind, crop_size)
crop_and_pool = tf.reduce_mean(crop, [1, 2], keep_dims=True)
ps_crop_and_pool = ops.position_sensitive_crop_regions(
tiled_image,
boxes,
box_ind,
crop_size,
num_spatial_bins,
global_pool=True)
with self.test_session() as sess:
expected_output, output = sess.run((crop_and_pool, ps_crop_and_pool))
self.assertAllClose(output, expected_output)
def test_position_sensitive_with_single_bin(self):
num_spatial_bins = [1, 1]
image_shape = [2, 3, 3, 4]
crop_size = [2, 2]
image = tf.random_uniform(image_shape)
boxes = tf.random_uniform((6, 4))
box_ind = tf.constant([0, 0, 0, 1, 1, 1], dtype=tf.int32)
# When a single bin is used, position-sensitive crop and pool should be
# the same as non-position sensitive crop and pool.
crop = tf.image.crop_and_resize(image, boxes, box_ind, crop_size)
crop_and_pool = tf.reduce_mean(crop, [1, 2], keep_dims=True)
ps_crop_and_pool = ops.position_sensitive_crop_regions(
image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True)
with self.test_session() as sess:
expected_output, output = sess.run((crop_and_pool, ps_crop_and_pool))
self.assertAllClose(output, expected_output)
def test_raise_value_error_on_num_bins_less_than_one(self):
num_spatial_bins = [1, -1]
image_shape = [1, 1, 1, 2]
crop_size = [2, 2]
image = tf.constant(1, dtype=tf.float32, shape=image_shape)
boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32)
box_ind = tf.constant([0], dtype=tf.int32)
with self.assertRaisesRegexp(ValueError, 'num_spatial_bins should be >= 1'):
ops.position_sensitive_crop_regions(
image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True)
def test_raise_value_error_on_non_divisible_crop_size(self):
num_spatial_bins = [2, 3]
image_shape = [1, 1, 1, 6]
crop_size = [3, 2]
image = tf.constant(1, dtype=tf.float32, shape=image_shape)
boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32)
box_ind = tf.constant([0], dtype=tf.int32)
with self.assertRaisesRegexp(
ValueError, 'crop_size should be divisible by num_spatial_bins'):
ops.position_sensitive_crop_regions(
image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True)
def test_raise_value_error_on_non_divisible_num_channels(self):
num_spatial_bins = [2, 2]
image_shape = [1, 1, 1, 5]
crop_size = [2, 2]
image = tf.constant(1, dtype=tf.float32, shape=image_shape)
boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32)
box_ind = tf.constant([0], dtype=tf.int32)
with self.assertRaisesRegexp(
ValueError, 'Dimension size must be evenly divisible by 4 but is 5'):
ops.position_sensitive_crop_regions(
image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True)
def test_position_sensitive_with_global_pool_false(self):
num_spatial_bins = [3, 2]
image_shape = [1, 3, 2, 6]
num_boxes = 2
# First channel is 1's, second channel is 2's, etc.
image = tf.constant(range(1, 3 * 2 + 1) * 6, dtype=tf.float32,
shape=image_shape)
boxes = tf.random_uniform((num_boxes, 4))
box_ind = tf.constant([0, 0], dtype=tf.int32)
expected_output = []
# Expected output, when crop_size = [3, 2].
expected_output.append(np.expand_dims(
np.tile(np.array([[1, 2],
[3, 4],
[5, 6]]), (num_boxes, 1, 1)),
axis=-1))
# Expected output, when crop_size = [6, 4].
expected_output.append(np.expand_dims(
np.tile(np.array([[1, 1, 2, 2],
[1, 1, 2, 2],
[3, 3, 4, 4],
[3, 3, 4, 4],
[5, 5, 6, 6],
[5, 5, 6, 6]]), (num_boxes, 1, 1)),
axis=-1))
for crop_size_mult in range(1, 3):
crop_size = [3 * crop_size_mult, 2 * crop_size_mult]
ps_crop = ops.position_sensitive_crop_regions(
image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=False)
with self.test_session() as sess:
output = sess.run(ps_crop)
self.assertAllEqual(output, expected_output[crop_size_mult - 1])
def test_position_sensitive_with_global_pool_false_and_known_boxes(self):
num_spatial_bins = [2, 2]
image_shape = [2, 2, 2, 4]
crop_size = [2, 2]
image = tf.constant(range(1, 2 * 2 * 4 + 1) * 2, dtype=tf.float32,
shape=image_shape)
# First box contains whole image, and second box contains only first row.
boxes = tf.constant(np.array([[0., 0., 1., 1.],
[0., 0., 0.5, 1.]]), dtype=tf.float32)
box_ind = tf.constant([0, 1], dtype=tf.int32)
expected_output = []
# Expected output, when the box containing whole image.
expected_output.append(
np.reshape(np.array([[4, 7],
[10, 13]]),
(1, 2, 2, 1))
)
# Expected output, when the box containing only first row.
expected_output.append(
np.reshape(np.array([[3, 6],
[7, 10]]),
(1, 2, 2, 1))
)
expected_output = np.concatenate(expected_output, axis=0)
ps_crop = ops.position_sensitive_crop_regions(
image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=False)
with self.test_session() as sess:
output = sess.run(ps_crop)
self.assertAllEqual(output, expected_output)
def test_position_sensitive_with_global_pool_false_and_single_bin(self):
num_spatial_bins = [1, 1]
image_shape = [2, 3, 3, 4]
crop_size = [1, 1]
image = tf.random_uniform(image_shape)
boxes = tf.random_uniform((6, 4))
box_ind = tf.constant([0, 0, 0, 1, 1, 1], dtype=tf.int32)
# Since single_bin is used and crop_size = [1, 1] (i.e., no crop resize),
# the outputs are the same whatever the global_pool value is.
ps_crop_and_pool = ops.position_sensitive_crop_regions(
image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True)
ps_crop = ops.position_sensitive_crop_regions(
image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=False)
with self.test_session() as sess:
pooled_output, unpooled_output = sess.run((ps_crop_and_pool, ps_crop))
self.assertAllClose(pooled_output, unpooled_output)
def test_position_sensitive_with_global_pool_false_and_do_global_pool(self):
num_spatial_bins = [3, 2]
image_shape = [1, 3, 2, 6]
num_boxes = 2
# First channel is 1's, second channel is 2's, etc.
image = tf.constant(range(1, 3 * 2 + 1) * 6, dtype=tf.float32,
shape=image_shape)
boxes = tf.random_uniform((num_boxes, 4))
box_ind = tf.constant([0, 0], dtype=tf.int32)
expected_output = []
# Expected output, when crop_size = [3, 2].
expected_output.append(np.mean(
np.expand_dims(
np.tile(np.array([[1, 2],
[3, 4],
[5, 6]]), (num_boxes, 1, 1)),
axis=-1),
axis=(1, 2), keepdims=True))
# Expected output, when crop_size = [6, 4].
expected_output.append(np.mean(
np.expand_dims(
np.tile(np.array([[1, 1, 2, 2],
[1, 1, 2, 2],
[3, 3, 4, 4],
[3, 3, 4, 4],
[5, 5, 6, 6],
[5, 5, 6, 6]]), (num_boxes, 1, 1)),
axis=-1),
axis=(1, 2), keepdims=True))
for crop_size_mult in range(1, 3):
crop_size = [3 * crop_size_mult, 2 * crop_size_mult]
# Perform global_pooling after running the function with
# global_pool=False.
ps_crop = ops.position_sensitive_crop_regions(
image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=False)
ps_crop_and_pool = tf.reduce_mean(
ps_crop, reduction_indices=(1, 2), keep_dims=True)
with self.test_session() as sess:
output = sess.run(ps_crop_and_pool)
self.assertAllEqual(output, expected_output[crop_size_mult - 1])
def test_raise_value_error_on_non_square_block_size(self):
num_spatial_bins = [3, 2]
image_shape = [1, 3, 2, 6]
crop_size = [6, 2]
image = tf.constant(1, dtype=tf.float32, shape=image_shape)
boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32)
box_ind = tf.constant([0], dtype=tf.int32)
with self.assertRaisesRegexp(
ValueError, 'Only support square bin crop size for now.'):
ops.position_sensitive_crop_regions(
image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=False)
class ReframeBoxMasksToImageMasksTest(tf.test.TestCase):
def testZeroImageOnEmptyMask(self):
box_masks = tf.constant([[[0, 0],
[0, 0]]], dtype=tf.float32)
boxes = tf.constant([[0.0, 0.0, 1.0, 1.0]], dtype=tf.float32)
image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes,
image_height=4,
image_width=4)
np_expected_image_masks = np.array([[[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]]], dtype=np.float32)
with self.test_session() as sess:
np_image_masks = sess.run(image_masks)
self.assertAllClose(np_image_masks, np_expected_image_masks)
def testMaskIsCenteredInImageWhenBoxIsCentered(self):
box_masks = tf.constant([[[1, 1],
[1, 1]]], dtype=tf.float32)
boxes = tf.constant([[0.25, 0.25, 0.75, 0.75]], dtype=tf.float32)
image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes,
image_height=4,
image_width=4)
np_expected_image_masks = np.array([[[0, 0, 0, 0],
[0, 1, 1, 0],
[0, 1, 1, 0],
[0, 0, 0, 0]]], dtype=np.float32)
with self.test_session() as sess:
np_image_masks = sess.run(image_masks)
self.assertAllClose(np_image_masks, np_expected_image_masks)
def testMaskOffCenterRemainsOffCenterInImage(self):
box_masks = tf.constant([[[1, 0],
[0, 1]]], dtype=tf.float32)
boxes = tf.constant([[0.25, 0.5, 0.75, 1.0]], dtype=tf.float32)
image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes,
image_height=4,
image_width=4)
np_expected_image_masks = np.array([[[0, 0, 0, 0],
[0, 0, 0.6111111, 0.16666669],
[0, 0, 0.3888889, 0.83333337],
[0, 0, 0, 0]]], dtype=np.float32)
with self.test_session() as sess:
np_image_masks = sess.run(image_masks)
self.assertAllClose(np_image_masks, np_expected_image_masks)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Evaluate Object Detection result on a single image.
Annotate each detected result as true positives or false positive according to
a predefined IOU ratio. Non Maximum Supression is used by default. Multi class
detection is supported by default.
"""
import numpy as np
from object_detection.utils import np_box_list
from object_detection.utils import np_box_list_ops
class PerImageEvaluation(object):
"""Evaluate detection result of a single image."""
def __init__(self,
num_groundtruth_classes,
matching_iou_threshold=0.5,
nms_iou_threshold=0.3,
nms_max_output_boxes=50):
"""Initialized PerImageEvaluation by evaluation parameters.
Args:
num_groundtruth_classes: Number of ground truth object classes
matching_iou_threshold: A ratio of area intersection to union, which is
the threshold to consider whether a detection is true positive or not
nms_iou_threshold: IOU threshold used in Non Maximum Suppression.
nms_max_output_boxes: Number of maximum output boxes in NMS.
"""
self.matching_iou_threshold = matching_iou_threshold
self.nms_iou_threshold = nms_iou_threshold
self.nms_max_output_boxes = nms_max_output_boxes
self.num_groundtruth_classes = num_groundtruth_classes
def compute_object_detection_metrics(self, detected_boxes, detected_scores,
detected_class_labels, groundtruth_boxes,
groundtruth_class_labels,
groundtruth_is_difficult_lists):
"""Compute Object Detection related metrics from a single image.
Args:
detected_boxes: A float numpy array of shape [N, 4], representing N
regions of detected object regions.
Each row is of the format [y_min, x_min, y_max, x_max]
detected_scores: A float numpy array of shape [N, 1], representing
the confidence scores of the detected N object instances.
detected_class_labels: A integer numpy array of shape [N, 1], repreneting
the class labels of the detected N object instances.
groundtruth_boxes: A float numpy array of shape [M, 4], representing M
regions of object instances in ground truth
groundtruth_class_labels: An integer numpy array of shape [M, 1],
representing M class labels of object instances in ground truth
groundtruth_is_difficult_lists: A boolean numpy array of length M denoting
whether a ground truth box is a difficult instance or not
Returns:
scores: A list of C float numpy arrays. Each numpy array is of
shape [K, 1], representing K scores detected with object class
label c
tp_fp_labels: A list of C boolean numpy arrays. Each numpy array
is of shape [K, 1], representing K True/False positive label of
object instances detected with class label c
is_class_correctly_detected_in_image: a numpy integer array of
shape [C, 1], indicating whether the correponding class has a least
one instance being correctly detected in the image
"""
detected_boxes, detected_scores, detected_class_labels = (
self._remove_invalid_boxes(detected_boxes, detected_scores,
detected_class_labels))
scores, tp_fp_labels = self._compute_tp_fp(
detected_boxes, detected_scores, detected_class_labels,
groundtruth_boxes, groundtruth_class_labels,
groundtruth_is_difficult_lists)
is_class_correctly_detected_in_image = self._compute_cor_loc(
detected_boxes, detected_scores, detected_class_labels,
groundtruth_boxes, groundtruth_class_labels)
return scores, tp_fp_labels, is_class_correctly_detected_in_image
def _compute_cor_loc(self, detected_boxes, detected_scores,
detected_class_labels, groundtruth_boxes,
groundtruth_class_labels):
"""Compute CorLoc score for object detection result.
Args:
detected_boxes: A float numpy array of shape [N, 4], representing N
regions of detected object regions.
Each row is of the format [y_min, x_min, y_max, x_max]
detected_scores: A float numpy array of shape [N, 1], representing
the confidence scores of the detected N object instances.
detected_class_labels: A integer numpy array of shape [N, 1], repreneting
the class labels of the detected N object instances.
groundtruth_boxes: A float numpy array of shape [M, 4], representing M
regions of object instances in ground truth
groundtruth_class_labels: An integer numpy array of shape [M, 1],
representing M class labels of object instances in ground truth
Returns:
is_class_correctly_detected_in_image: a numpy integer array of
shape [C, 1], indicating whether the correponding class has a least
one instance being correctly detected in the image
"""
is_class_correctly_detected_in_image = np.zeros(
self.num_groundtruth_classes, dtype=int)
for i in range(self.num_groundtruth_classes):
gt_boxes_at_ith_class = groundtruth_boxes[
groundtruth_class_labels == i, :]
detected_boxes_at_ith_class = detected_boxes[
detected_class_labels == i, :]
detected_scores_at_ith_class = detected_scores[detected_class_labels == i]
is_class_correctly_detected_in_image[i] = (
self._compute_is_aclass_correctly_detected_in_image(
detected_boxes_at_ith_class, detected_scores_at_ith_class,
gt_boxes_at_ith_class))
return is_class_correctly_detected_in_image
def _compute_is_aclass_correctly_detected_in_image(
self, detected_boxes, detected_scores, groundtruth_boxes):
"""Compute CorLoc score for a single class.
Args:
detected_boxes: A numpy array of shape [N, 4] representing detected box
coordinates
detected_scores: A 1-d numpy array of length N representing classification
score
groundtruth_boxes: A numpy array of shape [M, 4] representing ground truth
box coordinates
Returns:
is_class_correctly_detected_in_image: An integer 1 or 0 denoting whether a
class is correctly detected in the image or not
"""
if detected_boxes.size > 0:
if groundtruth_boxes.size > 0:
max_score_id = np.argmax(detected_scores)
detected_boxlist = np_box_list.BoxList(
np.expand_dims(detected_boxes[max_score_id, :], axis=0))
gt_boxlist = np_box_list.BoxList(groundtruth_boxes)
iou = np_box_list_ops.iou(detected_boxlist, gt_boxlist)
if np.max(iou) >= self.matching_iou_threshold:
return 1
return 0
def _compute_tp_fp(self, detected_boxes, detected_scores,
detected_class_labels, groundtruth_boxes,
groundtruth_class_labels, groundtruth_is_difficult_lists):
"""Labels true/false positives of detections of an image across all classes.
Args:
detected_boxes: A float numpy array of shape [N, 4], representing N
regions of detected object regions.
Each row is of the format [y_min, x_min, y_max, x_max]
detected_scores: A float numpy array of shape [N, 1], representing
the confidence scores of the detected N object instances.
detected_class_labels: A integer numpy array of shape [N, 1], repreneting
the class labels of the detected N object instances.
groundtruth_boxes: A float numpy array of shape [M, 4], representing M
regions of object instances in ground truth
groundtruth_class_labels: An integer numpy array of shape [M, 1],
representing M class labels of object instances in ground truth
groundtruth_is_difficult_lists: A boolean numpy array of length M denoting
whether a ground truth box is a difficult instance or not
Returns:
result_scores: A list of float numpy arrays. Each numpy array is of
shape [K, 1], representing K scores detected with object class
label c
result_tp_fp_labels: A list of boolean numpy array. Each numpy array is of
shape [K, 1], representing K True/False positive label of object
instances detected with class label c
"""
result_scores = []
result_tp_fp_labels = []
for i in range(self.num_groundtruth_classes):
gt_boxes_at_ith_class = groundtruth_boxes[(groundtruth_class_labels == i
), :]
groundtruth_is_difficult_list_at_ith_class = (
groundtruth_is_difficult_lists[groundtruth_class_labels == i])
detected_boxes_at_ith_class = detected_boxes[(detected_class_labels == i
), :]
detected_scores_at_ith_class = detected_scores[detected_class_labels == i]
scores, tp_fp_labels = self._compute_tp_fp_for_single_class(
detected_boxes_at_ith_class, detected_scores_at_ith_class,
gt_boxes_at_ith_class, groundtruth_is_difficult_list_at_ith_class)
result_scores.append(scores)
result_tp_fp_labels.append(tp_fp_labels)
return result_scores, result_tp_fp_labels
def _remove_invalid_boxes(self, detected_boxes, detected_scores,
detected_class_labels):
valid_indices = np.logical_and(detected_boxes[:, 0] < detected_boxes[:, 2],
detected_boxes[:, 1] < detected_boxes[:, 3])
return (detected_boxes[valid_indices, :], detected_scores[valid_indices],
detected_class_labels[valid_indices])
def _compute_tp_fp_for_single_class(self, detected_boxes, detected_scores,
groundtruth_boxes,
groundtruth_is_difficult_list):
"""Labels boxes detected with the same class from the same image as tp/fp.
Args:
detected_boxes: A numpy array of shape [N, 4] representing detected box
coordinates
detected_scores: A 1-d numpy array of length N representing classification
score
groundtruth_boxes: A numpy array of shape [M, 4] representing ground truth
box coordinates
groundtruth_is_difficult_list: A boolean numpy array of length M denoting
whether a ground truth box is a difficult instance or not
Returns:
scores: A numpy array representing the detection scores
tp_fp_labels: a boolean numpy array indicating whether a detection is a
true positive.
"""
if detected_boxes.size == 0:
return np.array([], dtype=float), np.array([], dtype=bool)
detected_boxlist = np_box_list.BoxList(detected_boxes)
detected_boxlist.add_field('scores', detected_scores)
detected_boxlist = np_box_list_ops.non_max_suppression(
detected_boxlist, self.nms_max_output_boxes, self.nms_iou_threshold)
scores = detected_boxlist.get_field('scores')
if groundtruth_boxes.size == 0:
return scores, np.zeros(detected_boxlist.num_boxes(), dtype=bool)
gt_boxlist = np_box_list.BoxList(groundtruth_boxes)
iou = np_box_list_ops.iou(detected_boxlist, gt_boxlist)
max_overlap_gt_ids = np.argmax(iou, axis=1)
is_gt_box_detected = np.zeros(gt_boxlist.num_boxes(), dtype=bool)
tp_fp_labels = np.zeros(detected_boxlist.num_boxes(), dtype=bool)
is_matched_to_difficult_box = np.zeros(
detected_boxlist.num_boxes(), dtype=bool)
for i in range(detected_boxlist.num_boxes()):
gt_id = max_overlap_gt_ids[i]
if iou[i, gt_id] >= self.matching_iou_threshold:
if not groundtruth_is_difficult_list[gt_id]:
if not is_gt_box_detected[gt_id]:
tp_fp_labels[i] = True
is_gt_box_detected[gt_id] = True
else:
is_matched_to_difficult_box[i] = True
return scores[~is_matched_to_difficult_box], tp_fp_labels[
~is_matched_to_difficult_box]
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.utils.per_image_evaluation."""
import numpy as np
import tensorflow as tf
from object_detection.utils import per_image_evaluation
class SingleClassTpFpWithDifficultBoxesTest(tf.test.TestCase):
def setUp(self):
num_groundtruth_classes = 1
matching_iou_threshold = 0.5
nms_iou_threshold = 1.0
nms_max_output_boxes = 10000
self.eval = per_image_evaluation.PerImageEvaluation(
num_groundtruth_classes, matching_iou_threshold, nms_iou_threshold,
nms_max_output_boxes)
self.detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
dtype=float)
self.detected_scores = np.array([0.6, 0.8, 0.5], dtype=float)
self.groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 10, 10]],
dtype=float)
def test_match_to_not_difficult_box(self):
groundtruth_groundtruth_is_difficult_list = np.array([False, True],
dtype=bool)
scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class(
self.detected_boxes, self.detected_scores, self.groundtruth_boxes,
groundtruth_groundtruth_is_difficult_list)
expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
expected_tp_fp_labels = np.array([False, True, False], dtype=bool)
self.assertTrue(np.allclose(expected_scores, scores))
self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
def test_match_to_difficult_box(self):
groundtruth_groundtruth_is_difficult_list = np.array([True, False],
dtype=bool)
scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class(
self.detected_boxes, self.detected_scores, self.groundtruth_boxes,
groundtruth_groundtruth_is_difficult_list)
expected_scores = np.array([0.8, 0.5], dtype=float)
expected_tp_fp_labels = np.array([False, False], dtype=bool)
self.assertTrue(np.allclose(expected_scores, scores))
self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
class SingleClassTpFpNoDifficultBoxesTest(tf.test.TestCase):
def setUp(self):
num_groundtruth_classes = 1
matching_iou_threshold1 = 0.5
matching_iou_threshold2 = 0.1
nms_iou_threshold = 1.0
nms_max_output_boxes = 10000
self.eval1 = per_image_evaluation.PerImageEvaluation(
num_groundtruth_classes, matching_iou_threshold1, nms_iou_threshold,
nms_max_output_boxes)
self.eval2 = per_image_evaluation.PerImageEvaluation(
num_groundtruth_classes, matching_iou_threshold2, nms_iou_threshold,
nms_max_output_boxes)
self.detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
dtype=float)
self.detected_scores = np.array([0.6, 0.8, 0.5], dtype=float)
def test_no_true_positives(self):
groundtruth_boxes = np.array([[100, 100, 105, 105]], dtype=float)
groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool)
scores, tp_fp_labels = self.eval1._compute_tp_fp_for_single_class(
self.detected_boxes, self.detected_scores, groundtruth_boxes,
groundtruth_groundtruth_is_difficult_list)
expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
expected_tp_fp_labels = np.array([False, False, False], dtype=bool)
self.assertTrue(np.allclose(expected_scores, scores))
self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
def test_one_true_positives_with_large_iou_threshold(self):
groundtruth_boxes = np.array([[0, 0, 1, 1]], dtype=float)
groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool)
scores, tp_fp_labels = self.eval1._compute_tp_fp_for_single_class(
self.detected_boxes, self.detected_scores, groundtruth_boxes,
groundtruth_groundtruth_is_difficult_list)
expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
expected_tp_fp_labels = np.array([False, True, False], dtype=bool)
self.assertTrue(np.allclose(expected_scores, scores))
self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
def test_one_true_positives_with_very_small_iou_threshold(self):
groundtruth_boxes = np.array([[0, 0, 1, 1]], dtype=float)
groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool)
scores, tp_fp_labels = self.eval2._compute_tp_fp_for_single_class(
self.detected_boxes, self.detected_scores, groundtruth_boxes,
groundtruth_groundtruth_is_difficult_list)
expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
expected_tp_fp_labels = np.array([True, False, False], dtype=bool)
self.assertTrue(np.allclose(expected_scores, scores))
self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
def test_two_true_positives_with_large_iou_threshold(self):
groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 3.5, 3.5]], dtype=float)
groundtruth_groundtruth_is_difficult_list = np.zeros(2, dtype=bool)
scores, tp_fp_labels = self.eval1._compute_tp_fp_for_single_class(
self.detected_boxes, self.detected_scores, groundtruth_boxes,
groundtruth_groundtruth_is_difficult_list)
expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
expected_tp_fp_labels = np.array([False, True, True], dtype=bool)
self.assertTrue(np.allclose(expected_scores, scores))
self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
class MultiClassesTpFpTest(tf.test.TestCase):
def test_tp_fp(self):
num_groundtruth_classes = 3
matching_iou_threshold = 0.5
nms_iou_threshold = 1.0
nms_max_output_boxes = 10000
eval1 = per_image_evaluation.PerImageEvaluation(num_groundtruth_classes,
matching_iou_threshold,
nms_iou_threshold,
nms_max_output_boxes)
detected_boxes = np.array([[0, 0, 1, 1], [10, 10, 5, 5], [0, 0, 2, 2],
[5, 10, 10, 5], [10, 5, 5, 10], [0, 0, 3, 3]],
dtype=float)
detected_scores = np.array([0.8, 0.1, 0.8, 0.9, 0.7, 0.8], dtype=float)
detected_class_labels = np.array([0, 1, 1, 2, 0, 2], dtype=int)
groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 3.5, 3.5]], dtype=float)
groundtruth_class_labels = np.array([0, 2], dtype=int)
groundtruth_groundtruth_is_difficult_list = np.zeros(2, dtype=float)
scores, tp_fp_labels, _ = eval1.compute_object_detection_metrics(
detected_boxes, detected_scores, detected_class_labels,
groundtruth_boxes, groundtruth_class_labels,
groundtruth_groundtruth_is_difficult_list)
expected_scores = [np.array([0.8], dtype=float)] * 3
expected_tp_fp_labels = [np.array([True]), np.array([False]), np.array([True
])]
for i in range(len(expected_scores)):
self.assertTrue(np.allclose(expected_scores[i], scores[i]))
self.assertTrue(np.array_equal(expected_tp_fp_labels[i], tp_fp_labels[i]))
class CorLocTest(tf.test.TestCase):
def test_compute_corloc_with_normal_iou_threshold(self):
num_groundtruth_classes = 3
matching_iou_threshold = 0.5
nms_iou_threshold = 1.0
nms_max_output_boxes = 10000
eval1 = per_image_evaluation.PerImageEvaluation(num_groundtruth_classes,
matching_iou_threshold,
nms_iou_threshold,
nms_max_output_boxes)
detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3],
[0, 0, 5, 5]], dtype=float)
detected_scores = np.array([0.9, 0.9, 0.1, 0.9], dtype=float)
detected_class_labels = np.array([0, 1, 0, 2], dtype=int)
groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 3, 3], [0, 0, 6, 6]],
dtype=float)
groundtruth_class_labels = np.array([0, 0, 2], dtype=int)
is_class_correctly_detected_in_image = eval1._compute_cor_loc(
detected_boxes, detected_scores, detected_class_labels,
groundtruth_boxes, groundtruth_class_labels)
expected_result = np.array([1, 0, 1], dtype=int)
self.assertTrue(np.array_equal(expected_result,
is_class_correctly_detected_in_image))
def test_compute_corloc_with_very_large_iou_threshold(self):
num_groundtruth_classes = 3
matching_iou_threshold = 0.9
nms_iou_threshold = 1.0
nms_max_output_boxes = 10000
eval1 = per_image_evaluation.PerImageEvaluation(num_groundtruth_classes,
matching_iou_threshold,
nms_iou_threshold,
nms_max_output_boxes)
detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3],
[0, 0, 5, 5]], dtype=float)
detected_scores = np.array([0.9, 0.9, 0.1, 0.9], dtype=float)
detected_class_labels = np.array([0, 1, 0, 2], dtype=int)
groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 3, 3], [0, 0, 6, 6]],
dtype=float)
groundtruth_class_labels = np.array([0, 0, 2], dtype=int)
is_class_correctly_detected_in_image = eval1._compute_cor_loc(
detected_boxes, detected_scores, detected_class_labels,
groundtruth_boxes, groundtruth_class_labels)
expected_result = np.array([1, 0, 0], dtype=int)
self.assertTrue(np.array_equal(expected_result,
is_class_correctly_detected_in_image))
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utils used to manipulate tensor shapes."""
import tensorflow as tf
def _is_tensor(t):
"""Returns a boolean indicating whether the input is a tensor.
Args:
t: the input to be tested.
Returns:
a boolean that indicates whether t is a tensor.
"""
return isinstance(t, (tf.Tensor, tf.SparseTensor, tf.Variable))
def _set_dim_0(t, d0):
"""Sets the 0-th dimension of the input tensor.
Args:
t: the input tensor, assuming the rank is at least 1.
d0: an integer indicating the 0-th dimension of the input tensor.
Returns:
the tensor t with the 0-th dimension set.
"""
t_shape = t.get_shape().as_list()
t_shape[0] = d0
t.set_shape(t_shape)
return t
def pad_tensor(t, length):
"""Pads the input tensor with 0s along the first dimension up to the length.
Args:
t: the input tensor, assuming the rank is at least 1.
length: a tensor of shape [1] or an integer, indicating the first dimension
of the input tensor t after padding, assuming length <= t.shape[0].
Returns:
padded_t: the padded tensor, whose first dimension is length. If the length
is an integer, the first dimension of padded_t is set to length
statically.
"""
t_rank = tf.rank(t)
t_shape = tf.shape(t)
t_d0 = t_shape[0]
pad_d0 = tf.expand_dims(length - t_d0, 0)
pad_shape = tf.cond(
tf.greater(t_rank, 1), lambda: tf.concat([pad_d0, t_shape[1:]], 0),
lambda: tf.expand_dims(length - t_d0, 0))
padded_t = tf.concat([t, tf.zeros(pad_shape, dtype=t.dtype)], 0)
if not _is_tensor(length):
padded_t = _set_dim_0(padded_t, length)
return padded_t
def clip_tensor(t, length):
"""Clips the input tensor along the first dimension up to the length.
Args:
t: the input tensor, assuming the rank is at least 1.
length: a tensor of shape [1] or an integer, indicating the first dimension
of the input tensor t after clipping, assuming length <= t.shape[0].
Returns:
clipped_t: the clipped tensor, whose first dimension is length. If the
length is an integer, the first dimension of clipped_t is set to length
statically.
"""
clipped_t = tf.gather(t, tf.range(length))
if not _is_tensor(length):
clipped_t = _set_dim_0(clipped_t, length)
return clipped_t
def pad_or_clip_tensor(t, length):
"""Pad or clip the input tensor along the first dimension.
Args:
t: the input tensor, assuming the rank is at least 1.
length: a tensor of shape [1] or an integer, indicating the first dimension
of the input tensor t after processing.
Returns:
processed_t: the processed tensor, whose first dimension is length. If the
length is an integer, the first dimension of the processed tensor is set
to length statically.
"""
processed_t = tf.cond(
tf.greater(tf.shape(t)[0], length),
lambda: clip_tensor(t, length),
lambda: pad_tensor(t, length))
if not _is_tensor(length):
processed_t = _set_dim_0(processed_t, length)
return processed_t
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.utils.shape_utils."""
import tensorflow as tf
from object_detection.utils import shape_utils
class UtilTest(tf.test.TestCase):
def test_pad_tensor_using_integer_input(self):
t1 = tf.constant([1], dtype=tf.int32)
pad_t1 = shape_utils.pad_tensor(t1, 2)
t2 = tf.constant([[0.1, 0.2]], dtype=tf.float32)
pad_t2 = shape_utils.pad_tensor(t2, 2)
self.assertEqual(2, pad_t1.get_shape()[0])
self.assertEqual(2, pad_t2.get_shape()[0])
with self.test_session() as sess:
pad_t1_result, pad_t2_result = sess.run([pad_t1, pad_t2])
self.assertAllEqual([1, 0], pad_t1_result)
self.assertAllClose([[0.1, 0.2], [0, 0]], pad_t2_result)
def test_pad_tensor_using_tensor_input(self):
t1 = tf.constant([1], dtype=tf.int32)
pad_t1 = shape_utils.pad_tensor(t1, tf.constant(2))
t2 = tf.constant([[0.1, 0.2]], dtype=tf.float32)
pad_t2 = shape_utils.pad_tensor(t2, tf.constant(2))
with self.test_session() as sess:
pad_t1_result, pad_t2_result = sess.run([pad_t1, pad_t2])
self.assertAllEqual([1, 0], pad_t1_result)
self.assertAllClose([[0.1, 0.2], [0, 0]], pad_t2_result)
def test_clip_tensor_using_integer_input(self):
t1 = tf.constant([1, 2, 3], dtype=tf.int32)
clip_t1 = shape_utils.clip_tensor(t1, 2)
t2 = tf.constant([[0.1, 0.2], [0.2, 0.4], [0.5, 0.8]], dtype=tf.float32)
clip_t2 = shape_utils.clip_tensor(t2, 2)
self.assertEqual(2, clip_t1.get_shape()[0])
self.assertEqual(2, clip_t2.get_shape()[0])
with self.test_session() as sess:
clip_t1_result, clip_t2_result = sess.run([clip_t1, clip_t2])
self.assertAllEqual([1, 2], clip_t1_result)
self.assertAllClose([[0.1, 0.2], [0.2, 0.4]], clip_t2_result)
def test_clip_tensor_using_tensor_input(self):
t1 = tf.constant([1, 2, 3], dtype=tf.int32)
clip_t1 = shape_utils.clip_tensor(t1, tf.constant(2))
t2 = tf.constant([[0.1, 0.2], [0.2, 0.4], [0.5, 0.8]], dtype=tf.float32)
clip_t2 = shape_utils.clip_tensor(t2, tf.constant(2))
with self.test_session() as sess:
clip_t1_result, clip_t2_result = sess.run([clip_t1, clip_t2])
self.assertAllEqual([1, 2], clip_t1_result)
self.assertAllClose([[0.1, 0.2], [0.2, 0.4]], clip_t2_result)
def test_pad_or_clip_tensor_using_integer_input(self):
t1 = tf.constant([1], dtype=tf.int32)
tt1 = shape_utils.pad_or_clip_tensor(t1, 2)
t2 = tf.constant([[0.1, 0.2]], dtype=tf.float32)
tt2 = shape_utils.pad_or_clip_tensor(t2, 2)
t3 = tf.constant([1, 2, 3], dtype=tf.int32)
tt3 = shape_utils.clip_tensor(t3, 2)
t4 = tf.constant([[0.1, 0.2], [0.2, 0.4], [0.5, 0.8]], dtype=tf.float32)
tt4 = shape_utils.clip_tensor(t4, 2)
self.assertEqual(2, tt1.get_shape()[0])
self.assertEqual(2, tt2.get_shape()[0])
self.assertEqual(2, tt3.get_shape()[0])
self.assertEqual(2, tt4.get_shape()[0])
with self.test_session() as sess:
tt1_result, tt2_result, tt3_result, tt4_result = sess.run(
[tt1, tt2, tt3, tt4])
self.assertAllEqual([1, 0], tt1_result)
self.assertAllClose([[0.1, 0.2], [0, 0]], tt2_result)
self.assertAllEqual([1, 2], tt3_result)
self.assertAllClose([[0.1, 0.2], [0.2, 0.4]], tt4_result)
def test_pad_or_clip_tensor_using_tensor_input(self):
t1 = tf.constant([1], dtype=tf.int32)
tt1 = shape_utils.pad_or_clip_tensor(t1, tf.constant(2))
t2 = tf.constant([[0.1, 0.2]], dtype=tf.float32)
tt2 = shape_utils.pad_or_clip_tensor(t2, tf.constant(2))
t3 = tf.constant([1, 2, 3], dtype=tf.int32)
tt3 = shape_utils.clip_tensor(t3, tf.constant(2))
t4 = tf.constant([[0.1, 0.2], [0.2, 0.4], [0.5, 0.8]], dtype=tf.float32)
tt4 = shape_utils.clip_tensor(t4, tf.constant(2))
with self.test_session() as sess:
tt1_result, tt2_result, tt3_result, tt4_result = sess.run(
[tt1, tt2, tt3, tt4])
self.assertAllEqual([1, 0], tt1_result)
self.assertAllClose([[0.1, 0.2], [0, 0]], tt2_result)
self.assertAllEqual([1, 2], tt3_result)
self.assertAllClose([[0.1, 0.2], [0.2, 0.4]], tt4_result)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Helper functions to access TensorShape values.
The rank 4 tensor_shape must be of the form [batch_size, height, width, depth].
"""
def get_batch_size(tensor_shape):
"""Returns batch size from the tensor shape.
Args:
tensor_shape: A rank 4 TensorShape.
Returns:
An integer representing the batch size of the tensor.
"""
tensor_shape.assert_has_rank(rank=4)
return tensor_shape[0].value
def get_height(tensor_shape):
"""Returns height from the tensor shape.
Args:
tensor_shape: A rank 4 TensorShape.
Returns:
An integer representing the height of the tensor.
"""
tensor_shape.assert_has_rank(rank=4)
return tensor_shape[1].value
def get_width(tensor_shape):
"""Returns width from the tensor shape.
Args:
tensor_shape: A rank 4 TensorShape.
Returns:
An integer representing the width of the tensor.
"""
tensor_shape.assert_has_rank(rank=4)
return tensor_shape[2].value
def get_depth(tensor_shape):
"""Returns depth from the tensor shape.
Args:
tensor_shape: A rank 4 TensorShape.
Returns:
An integer representing the depth of the tensor.
"""
tensor_shape.assert_has_rank(rank=4)
return tensor_shape[3].value
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.utils.static_shape."""
import tensorflow as tf
from object_detection.utils import static_shape
class StaticShapeTest(tf.test.TestCase):
def test_return_correct_batchSize(self):
tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3])
self.assertEqual(32, static_shape.get_batch_size(tensor_shape))
def test_return_correct_height(self):
tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3])
self.assertEqual(299, static_shape.get_height(tensor_shape))
def test_return_correct_width(self):
tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3])
self.assertEqual(384, static_shape.get_width(tensor_shape))
def test_return_correct_depth(self):
tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3])
self.assertEqual(3, static_shape.get_depth(tensor_shape))
def test_die_on_tensor_shape_with_rank_three(self):
tensor_shape = tf.TensorShape(dims=[32, 299, 384])
with self.assertRaises(ValueError):
static_shape.get_batch_size(tensor_shape)
static_shape.get_height(tensor_shape)
static_shape.get_width(tensor_shape)
static_shape.get_depth(tensor_shape)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains functions which are convenient for unit testing."""
import numpy as np
import tensorflow as tf
from object_detection.core import anchor_generator
from object_detection.core import box_coder
from object_detection.core import box_list
from object_detection.core import box_predictor
from object_detection.core import matcher
class MockBoxCoder(box_coder.BoxCoder):
"""Simple `difference` BoxCoder."""
@property
def code_size(self):
return 4
def _encode(self, boxes, anchors):
return boxes.get() - anchors.get()
def _decode(self, rel_codes, anchors):
return box_list.BoxList(rel_codes + anchors.get())
class MockBoxPredictor(box_predictor.BoxPredictor):
"""Simple box predictor that ignores inputs and outputs all zeros."""
def __init__(self, is_training, num_classes):
super(MockBoxPredictor, self).__init__(is_training, num_classes)
def _predict(self, image_features, num_predictions_per_location):
batch_size = image_features.get_shape().as_list()[0]
num_anchors = (image_features.get_shape().as_list()[1]
* image_features.get_shape().as_list()[2])
code_size = 4
zero = tf.reduce_sum(0 * image_features)
box_encodings = zero + tf.zeros(
(batch_size, num_anchors, 1, code_size), dtype=tf.float32)
class_predictions_with_background = zero + tf.zeros(
(batch_size, num_anchors, self.num_classes + 1), dtype=tf.float32)
return {box_predictor.BOX_ENCODINGS: box_encodings,
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND:
class_predictions_with_background}
class MockAnchorGenerator(anchor_generator.AnchorGenerator):
"""Mock anchor generator."""
def name_scope(self):
return 'MockAnchorGenerator'
def num_anchors_per_location(self):
return [1]
def _generate(self, feature_map_shape_list):
num_anchors = sum([shape[0] * shape[1] for shape in feature_map_shape_list])
return box_list.BoxList(tf.zeros((num_anchors, 4), dtype=tf.float32))
class MockMatcher(matcher.Matcher):
"""Simple matcher that matches first anchor to first groundtruth box."""
def _match(self, similarity_matrix):
return tf.constant([0, -1, -1, -1], dtype=tf.int32)
def create_diagonal_gradient_image(height, width, depth):
"""Creates pyramid image. Useful for testing.
For example, pyramid_image(5, 6, 1) looks like:
# [[[ 5. 4. 3. 2. 1. 0.]
# [ 6. 5. 4. 3. 2. 1.]
# [ 7. 6. 5. 4. 3. 2.]
# [ 8. 7. 6. 5. 4. 3.]
# [ 9. 8. 7. 6. 5. 4.]]]
Args:
height: height of image
width: width of image
depth: depth of image
Returns:
pyramid image
"""
row = np.arange(height)
col = np.arange(width)[::-1]
image_layer = np.expand_dims(row, 1) + col
image_layer = np.expand_dims(image_layer, 2)
image = image_layer
for i in range(1, depth):
image = np.concatenate((image, image_layer * pow(10, i)), 2)
return image.astype(np.float32)
def create_random_boxes(num_boxes, max_height, max_width):
"""Creates random bounding boxes of specific maximum height and width.
Args:
num_boxes: number of boxes.
max_height: maximum height of boxes.
max_width: maximum width of boxes.
Returns:
boxes: numpy array of shape [num_boxes, 4]. Each row is in form
[y_min, x_min, y_max, x_max].
"""
y_1 = np.random.uniform(size=(1, num_boxes)) * max_height
y_2 = np.random.uniform(size=(1, num_boxes)) * max_height
x_1 = np.random.uniform(size=(1, num_boxes)) * max_width
x_2 = np.random.uniform(size=(1, num_boxes)) * max_width
boxes = np.zeros(shape=(num_boxes, 4))
boxes[:, 0] = np.minimum(y_1, y_2)
boxes[:, 1] = np.minimum(x_1, x_2)
boxes[:, 2] = np.maximum(y_1, y_2)
boxes[:, 3] = np.maximum(x_1, x_2)
return boxes.astype(np.float32)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.utils.test_utils."""
import numpy as np
import tensorflow as tf
from object_detection.utils import test_utils
class TestUtilsTest(tf.test.TestCase):
def test_diagonal_gradient_image(self):
"""Tests if a good pyramid image is created."""
pyramid_image = test_utils.create_diagonal_gradient_image(3, 4, 2)
# Test which is easy to understand.
expected_first_channel = np.array([[3, 2, 1, 0],
[4, 3, 2, 1],
[5, 4, 3, 2]], dtype=np.float32)
self.assertAllEqual(np.squeeze(pyramid_image[:, :, 0]),
expected_first_channel)
# Actual test.
expected_image = np.array([[[3, 30],
[2, 20],
[1, 10],
[0, 0]],
[[4, 40],
[3, 30],
[2, 20],
[1, 10]],
[[5, 50],
[4, 40],
[3, 30],
[2, 20]]], dtype=np.float32)
self.assertAllEqual(pyramid_image, expected_image)
def test_random_boxes(self):
"""Tests if valid random boxes are created."""
num_boxes = 1000
max_height = 3
max_width = 5
boxes = test_utils.create_random_boxes(num_boxes,
max_height,
max_width)
true_column = np.ones(shape=(num_boxes)) == 1
self.assertAllEqual(boxes[:, 0] < boxes[:, 2], true_column)
self.assertAllEqual(boxes[:, 1] < boxes[:, 3], true_column)
self.assertTrue(boxes[:, 0].min() >= 0)
self.assertTrue(boxes[:, 1].min() >= 0)
self.assertTrue(boxes[:, 2].max() <= max_height)
self.assertTrue(boxes[:, 3].max() <= max_width)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Helper functions for manipulating collections of variables during training.
"""
import logging
import re
import tensorflow as tf
slim = tf.contrib.slim
# TODO: Consider replacing with tf.contrib.filter_variables in
# tensorflow/contrib/framework/python/ops/variables.py
def filter_variables(variables, filter_regex_list, invert=False):
"""Filters out the variables matching the filter_regex.
Filter out the variables whose name matches the any of the regular
expressions in filter_regex_list and returns the remaining variables.
Optionally, if invert=True, the complement set is returned.
Args:
variables: a list of tensorflow variables.
filter_regex_list: a list of string regular expressions.
invert: (boolean). If True, returns the complement of the filter set; that
is, all variables matching filter_regex are kept and all others discarded.
Returns:
a list of filtered variables.
"""
kept_vars = []
variables_to_ignore_patterns = filter(None, filter_regex_list)
for var in variables:
add = True
for pattern in variables_to_ignore_patterns:
if re.match(pattern, var.op.name):
add = False
break
if add != invert:
kept_vars.append(var)
return kept_vars
def multiply_gradients_matching_regex(grads_and_vars, regex_list, multiplier):
"""Multiply gradients whose variable names match a regular expression.
Args:
grads_and_vars: A list of gradient to variable pairs (tuples).
regex_list: A list of string regular expressions.
multiplier: A (float) multiplier to apply to each gradient matching the
regular expression.
Returns:
grads_and_vars: A list of gradient to variable pairs (tuples).
"""
variables = [pair[1] for pair in grads_and_vars]
matching_vars = filter_variables(variables, regex_list, invert=True)
for var in matching_vars:
logging.info('Applying multiplier %f to variable [%s]',
multiplier, var.op.name)
grad_multipliers = {var: float(multiplier) for var in matching_vars}
return slim.learning.multiply_gradients(grads_and_vars,
grad_multipliers)
def freeze_gradients_matching_regex(grads_and_vars, regex_list):
"""Freeze gradients whose variable names match a regular expression.
Args:
grads_and_vars: A list of gradient to variable pairs (tuples).
regex_list: A list of string regular expressions.
Returns:
grads_and_vars: A list of gradient to variable pairs (tuples) that do not
contain the variables and gradients matching the regex.
"""
variables = [pair[1] for pair in grads_and_vars]
matching_vars = filter_variables(variables, regex_list, invert=True)
kept_grads_and_vars = [pair for pair in grads_and_vars
if pair[1] not in matching_vars]
for var in matching_vars:
logging.info('Freezing variable [%s]', var.op.name)
return kept_grads_and_vars
def get_variables_available_in_checkpoint(variables, checkpoint_path):
"""Returns the subset of variables available in the checkpoint.
Inspects given checkpoint and returns the subset of variables that are
available in it.
TODO: force input and output to be a dictionary.
Args:
variables: a list or dictionary of variables to find in checkpoint.
checkpoint_path: path to the checkpoint to restore variables from.
Returns:
A list or dictionary of variables.
Raises:
ValueError: if `variables` is not a list or dict.
"""
if isinstance(variables, list):
variable_names_map = {variable.op.name: variable for variable in variables}
elif isinstance(variables, dict):
variable_names_map = variables
else:
raise ValueError('`variables` is expected to be a list or dict.')
ckpt_reader = tf.train.NewCheckpointReader(checkpoint_path)
ckpt_vars = ckpt_reader.get_variable_to_shape_map().keys()
vars_in_ckpt = {}
for variable_name, variable in sorted(variable_names_map.iteritems()):
if variable_name in ckpt_vars:
vars_in_ckpt[variable_name] = variable
else:
logging.warning('Variable [%s] not available in checkpoint',
variable_name)
if isinstance(variables, list):
return vars_in_ckpt.values()
return vars_in_ckpt
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.utils.variables_helper."""
import os
import tensorflow as tf
from object_detection.utils import variables_helper
class FilterVariablesTest(tf.test.TestCase):
def _create_variables(self):
return [tf.Variable(1.0, name='FeatureExtractor/InceptionV3/weights'),
tf.Variable(1.0, name='FeatureExtractor/InceptionV3/biases'),
tf.Variable(1.0, name='StackProposalGenerator/weights'),
tf.Variable(1.0, name='StackProposalGenerator/biases')]
def test_return_all_variables_when_empty_regex(self):
variables = self._create_variables()
out_variables = variables_helper.filter_variables(variables, [''])
self.assertItemsEqual(out_variables, variables)
def test_return_variables_which_do_not_match_single_regex(self):
variables = self._create_variables()
out_variables = variables_helper.filter_variables(variables,
['FeatureExtractor/.*'])
self.assertItemsEqual(out_variables, variables[2:])
def test_return_variables_which_do_not_match_any_regex_in_list(self):
variables = self._create_variables()
out_variables = variables_helper.filter_variables(variables, [
'FeatureExtractor.*biases', 'StackProposalGenerator.*biases'
])
self.assertItemsEqual(out_variables, [variables[0], variables[2]])
def test_return_variables_matching_empty_regex_list(self):
variables = self._create_variables()
out_variables = variables_helper.filter_variables(
variables, [''], invert=True)
self.assertItemsEqual(out_variables, [])
def test_return_variables_matching_some_regex_in_list(self):
variables = self._create_variables()
out_variables = variables_helper.filter_variables(
variables,
['FeatureExtractor.*biases', 'StackProposalGenerator.*biases'],
invert=True)
self.assertItemsEqual(out_variables, [variables[1], variables[3]])
class MultiplyGradientsMatchingRegexTest(tf.test.TestCase):
def _create_grads_and_vars(self):
return [(tf.constant(1.0),
tf.Variable(1.0, name='FeatureExtractor/InceptionV3/weights')),
(tf.constant(2.0),
tf.Variable(2.0, name='FeatureExtractor/InceptionV3/biases')),
(tf.constant(3.0),
tf.Variable(3.0, name='StackProposalGenerator/weights')),
(tf.constant(4.0),
tf.Variable(4.0, name='StackProposalGenerator/biases'))]
def test_multiply_all_feature_extractor_variables(self):
grads_and_vars = self._create_grads_and_vars()
regex_list = ['FeatureExtractor/.*']
multiplier = 0.0
grads_and_vars = variables_helper.multiply_gradients_matching_regex(
grads_and_vars, regex_list, multiplier)
exp_output = [(0.0, 1.0), (0.0, 2.0), (3.0, 3.0), (4.0, 4.0)]
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
output = sess.run(grads_and_vars)
self.assertItemsEqual(output, exp_output)
def test_multiply_all_bias_variables(self):
grads_and_vars = self._create_grads_and_vars()
regex_list = ['.*/biases']
multiplier = 0.0
grads_and_vars = variables_helper.multiply_gradients_matching_regex(
grads_and_vars, regex_list, multiplier)
exp_output = [(1.0, 1.0), (0.0, 2.0), (3.0, 3.0), (0.0, 4.0)]
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
output = sess.run(grads_and_vars)
self.assertItemsEqual(output, exp_output)
class FreezeGradientsMatchingRegexTest(tf.test.TestCase):
def _create_grads_and_vars(self):
return [(tf.constant(1.0),
tf.Variable(1.0, name='FeatureExtractor/InceptionV3/weights')),
(tf.constant(2.0),
tf.Variable(2.0, name='FeatureExtractor/InceptionV3/biases')),
(tf.constant(3.0),
tf.Variable(3.0, name='StackProposalGenerator/weights')),
(tf.constant(4.0),
tf.Variable(4.0, name='StackProposalGenerator/biases'))]
def test_freeze_all_feature_extractor_variables(self):
grads_and_vars = self._create_grads_and_vars()
regex_list = ['FeatureExtractor/.*']
grads_and_vars = variables_helper.freeze_gradients_matching_regex(
grads_and_vars, regex_list)
exp_output = [(3.0, 3.0), (4.0, 4.0)]
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
output = sess.run(grads_and_vars)
self.assertItemsEqual(output, exp_output)
class GetVariablesAvailableInCheckpointTest(tf.test.TestCase):
def test_return_all_variables_from_checkpoint(self):
variables = [
tf.Variable(1.0, name='weights'),
tf.Variable(1.0, name='biases')
]
checkpoint_path = os.path.join(self.get_temp_dir(), 'graph.pb')
init_op = tf.global_variables_initializer()
saver = tf.train.Saver(variables)
with self.test_session() as sess:
sess.run(init_op)
saver.save(sess, checkpoint_path)
out_variables = variables_helper.get_variables_available_in_checkpoint(
variables, checkpoint_path)
self.assertItemsEqual(out_variables, variables)
def test_return_variables_available_in_checkpoint(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'graph.pb')
graph1_variables = [
tf.Variable(1.0, name='weights'),
]
init_op = tf.global_variables_initializer()
saver = tf.train.Saver(graph1_variables)
with self.test_session() as sess:
sess.run(init_op)
saver.save(sess, checkpoint_path)
graph2_variables = graph1_variables + [tf.Variable(1.0, name='biases')]
out_variables = variables_helper.get_variables_available_in_checkpoint(
graph2_variables, checkpoint_path)
self.assertItemsEqual(out_variables, graph1_variables)
def test_return_variables_available_an_checkpoint_with_dict_inputs(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'graph.pb')
graph1_variables = [
tf.Variable(1.0, name='ckpt_weights'),
]
init_op = tf.global_variables_initializer()
saver = tf.train.Saver(graph1_variables)
with self.test_session() as sess:
sess.run(init_op)
saver.save(sess, checkpoint_path)
graph2_variables_dict = {
'ckpt_weights': tf.Variable(1.0, name='weights'),
'ckpt_biases': tf.Variable(1.0, name='biases')
}
out_variables = variables_helper.get_variables_available_in_checkpoint(
graph2_variables_dict, checkpoint_path)
self.assertTrue(isinstance(out_variables, dict))
self.assertItemsEqual(out_variables.keys(), ['ckpt_weights'])
self.assertTrue(out_variables['ckpt_weights'].op.name == 'weights')
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment