Unverified Commit fd7b6887 authored by Jonathan Huang's avatar Jonathan Huang Committed by GitHub
Browse files

Merge pull request #3293 from pkulzc/master

Internal changes of object_detection 
parents f98ec55e 1efe98bb
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Numpy BoxMaskList classes and functions."""
import numpy as np
from object_detection.utils import np_box_list
class BoxMaskList(np_box_list.BoxList):
"""Convenience wrapper for BoxList with masks.
BoxMaskList extends the np_box_list.BoxList to contain masks as well.
In particular, its constructor receives both boxes and masks. Note that the
masks correspond to the full image.
"""
def __init__(self, box_data, mask_data):
"""Constructs box collection.
Args:
box_data: a numpy array of shape [N, 4] representing box coordinates
mask_data: a numpy array of shape [N, height, width] representing masks
with values are in {0,1}. The masks correspond to the full
image. The height and the width will be equal to image height and width.
Raises:
ValueError: if bbox data is not a numpy array
ValueError: if invalid dimensions for bbox data
ValueError: if mask data is not a numpy array
ValueError: if invalid dimension for mask data
"""
super(BoxMaskList, self).__init__(box_data)
if not isinstance(mask_data, np.ndarray):
raise ValueError('Mask data must be a numpy array.')
if len(mask_data.shape) != 3:
raise ValueError('Invalid dimensions for mask data.')
if mask_data.dtype != np.uint8:
raise ValueError('Invalid data type for mask data: uint8 is required.')
if mask_data.shape[0] != box_data.shape[0]:
raise ValueError('There should be the same number of boxes and masks.')
self.data['masks'] = mask_data
def get_masks(self):
"""Convenience function for accessing masks.
Returns:
a numpy array of shape [N, height, width] representing masks
"""
return self.get_field('masks')
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Operations for np_box_mask_list.BoxMaskList.
Example box operations that are supported:
* Areas: compute bounding box areas
* IOU: pairwise intersection-over-union scores
"""
import numpy as np
from object_detection.utils import np_box_list_ops
from object_detection.utils import np_box_mask_list
from object_detection.utils import np_mask_ops
def box_list_to_box_mask_list(boxlist):
"""Converts a BoxList containing 'masks' into a BoxMaskList.
Args:
boxlist: An np_box_list.BoxList object.
Returns:
An np_box_mask_list.BoxMaskList object.
Raises:
ValueError: If boxlist does not contain `masks` as a field.
"""
if not boxlist.has_field('masks'):
raise ValueError('boxlist does not contain mask field.')
box_mask_list = np_box_mask_list.BoxMaskList(
box_data=boxlist.get(),
mask_data=boxlist.get_field('masks'))
extra_fields = boxlist.get_extra_fields()
for key in extra_fields:
if key != 'masks':
box_mask_list.data[key] = boxlist.get_field(key)
return box_mask_list
def area(box_mask_list):
"""Computes area of masks.
Args:
box_mask_list: np_box_mask_list.BoxMaskList holding N boxes and masks
Returns:
a numpy array with shape [N*1] representing mask areas
"""
return np_mask_ops.area(box_mask_list.get_masks())
def intersection(box_mask_list1, box_mask_list2):
"""Compute pairwise intersection areas between masks.
Args:
box_mask_list1: BoxMaskList holding N boxes and masks
box_mask_list2: BoxMaskList holding M boxes and masks
Returns:
a numpy array with shape [N*M] representing pairwise intersection area
"""
return np_mask_ops.intersection(box_mask_list1.get_masks(),
box_mask_list2.get_masks())
def iou(box_mask_list1, box_mask_list2):
"""Computes pairwise intersection-over-union between box and mask collections.
Args:
box_mask_list1: BoxMaskList holding N boxes and masks
box_mask_list2: BoxMaskList holding M boxes and masks
Returns:
a numpy array with shape [N, M] representing pairwise iou scores.
"""
return np_mask_ops.iou(box_mask_list1.get_masks(),
box_mask_list2.get_masks())
def ioa(box_mask_list1, box_mask_list2):
"""Computes pairwise intersection-over-area between box and mask collections.
Intersection-over-area (ioa) between two masks mask1 and mask2 is defined as
their intersection area over mask2's area. Note that ioa is not symmetric,
that is, IOA(mask1, mask2) != IOA(mask2, mask1).
Args:
box_mask_list1: np_box_mask_list.BoxMaskList holding N boxes and masks
box_mask_list2: np_box_mask_list.BoxMaskList holding M boxes and masks
Returns:
a numpy array with shape [N, M] representing pairwise ioa scores.
"""
return np_mask_ops.ioa(box_mask_list1.get_masks(), box_mask_list2.get_masks())
def gather(box_mask_list, indices, fields=None):
"""Gather boxes from np_box_mask_list.BoxMaskList according to indices.
By default, gather returns boxes corresponding to the input index list, as
well as all additional fields stored in the box_mask_list (indexing into the
first dimension). However one can optionally only gather from a
subset of fields.
Args:
box_mask_list: np_box_mask_list.BoxMaskList holding N boxes
indices: a 1-d numpy array of type int_
fields: (optional) list of fields to also gather from. If None (default),
all fields are gathered from. Pass an empty fields list to only gather
the box coordinates.
Returns:
subbox_mask_list: a np_box_mask_list.BoxMaskList corresponding to the subset
of the input box_mask_list specified by indices
Raises:
ValueError: if specified field is not contained in box_mask_list or if the
indices are not of type int_
"""
if fields is not None:
if 'masks' not in fields:
fields.append('masks')
return box_list_to_box_mask_list(
np_box_list_ops.gather(
boxlist=box_mask_list, indices=indices, fields=fields))
def sort_by_field(box_mask_list, field,
order=np_box_list_ops.SortOrder.DESCEND):
"""Sort boxes and associated fields according to a scalar field.
A common use case is reordering the boxes according to descending scores.
Args:
box_mask_list: BoxMaskList holding N boxes.
field: A BoxMaskList field for sorting and reordering the BoxMaskList.
order: (Optional) 'descend' or 'ascend'. Default is descend.
Returns:
sorted_box_mask_list: A sorted BoxMaskList with the field in the specified
order.
"""
return box_list_to_box_mask_list(
np_box_list_ops.sort_by_field(
boxlist=box_mask_list, field=field, order=order))
def non_max_suppression(box_mask_list,
max_output_size=10000,
iou_threshold=1.0,
score_threshold=-10.0):
"""Non maximum suppression.
This op greedily selects a subset of detection bounding boxes, pruning
away boxes that have high IOU (intersection over union) overlap (> thresh)
with already selected boxes. In each iteration, the detected bounding box with
highest score in the available pool is selected.
Args:
box_mask_list: np_box_mask_list.BoxMaskList holding N boxes. Must contain
a 'scores' field representing detection scores. All scores belong to the
same class.
max_output_size: maximum number of retained boxes
iou_threshold: intersection over union threshold.
score_threshold: minimum score threshold. Remove the boxes with scores
less than this value. Default value is set to -10. A very
low threshold to pass pretty much all the boxes, unless
the user sets a different score threshold.
Returns:
an np_box_mask_list.BoxMaskList holding M boxes where M <= max_output_size
Raises:
ValueError: if 'scores' field does not exist
ValueError: if threshold is not in [0, 1]
ValueError: if max_output_size < 0
"""
if not box_mask_list.has_field('scores'):
raise ValueError('Field scores does not exist')
if iou_threshold < 0. or iou_threshold > 1.0:
raise ValueError('IOU threshold must be in [0, 1]')
if max_output_size < 0:
raise ValueError('max_output_size must be bigger than 0.')
box_mask_list = filter_scores_greater_than(box_mask_list, score_threshold)
if box_mask_list.num_boxes() == 0:
return box_mask_list
box_mask_list = sort_by_field(box_mask_list, 'scores')
# Prevent further computation if NMS is disabled.
if iou_threshold == 1.0:
if box_mask_list.num_boxes() > max_output_size:
selected_indices = np.arange(max_output_size)
return gather(box_mask_list, selected_indices)
else:
return box_mask_list
masks = box_mask_list.get_masks()
num_masks = box_mask_list.num_boxes()
# is_index_valid is True only for all remaining valid boxes,
is_index_valid = np.full(num_masks, 1, dtype=bool)
selected_indices = []
num_output = 0
for i in xrange(num_masks):
if num_output < max_output_size:
if is_index_valid[i]:
num_output += 1
selected_indices.append(i)
is_index_valid[i] = False
valid_indices = np.where(is_index_valid)[0]
if valid_indices.size == 0:
break
intersect_over_union = np_mask_ops.iou(
np.expand_dims(masks[i], axis=0), masks[valid_indices])
intersect_over_union = np.squeeze(intersect_over_union, axis=0)
is_index_valid[valid_indices] = np.logical_and(
is_index_valid[valid_indices],
intersect_over_union <= iou_threshold)
return gather(box_mask_list, np.array(selected_indices))
def multi_class_non_max_suppression(box_mask_list, score_thresh, iou_thresh,
max_output_size):
"""Multi-class version of non maximum suppression.
This op greedily selects a subset of detection bounding boxes, pruning
away boxes that have high IOU (intersection over union) overlap (> thresh)
with already selected boxes. It operates independently for each class for
which scores are provided (via the scores field of the input box_list),
pruning boxes with score less than a provided threshold prior to
applying NMS.
Args:
box_mask_list: np_box_mask_list.BoxMaskList holding N boxes. Must contain a
'scores' field representing detection scores. This scores field is a
tensor that can be 1 dimensional (in the case of a single class) or
2-dimensional, in which case we assume that it takes the
shape [num_boxes, num_classes]. We further assume that this rank is known
statically and that scores.shape[1] is also known (i.e., the number of
classes is fixed and known at graph construction time).
score_thresh: scalar threshold for score (low scoring boxes are removed).
iou_thresh: scalar threshold for IOU (boxes that that high IOU overlap
with previously selected boxes are removed).
max_output_size: maximum number of retained boxes per class.
Returns:
a box_mask_list holding M boxes with a rank-1 scores field representing
corresponding scores for each box with scores sorted in decreasing order
and a rank-1 classes field representing a class label for each box.
Raises:
ValueError: if iou_thresh is not in [0, 1] or if input box_mask_list does
not have a valid scores field.
"""
if not 0 <= iou_thresh <= 1.0:
raise ValueError('thresh must be between 0 and 1')
if not isinstance(box_mask_list, np_box_mask_list.BoxMaskList):
raise ValueError('box_mask_list must be a box_mask_list')
if not box_mask_list.has_field('scores'):
raise ValueError('input box_mask_list must have \'scores\' field')
scores = box_mask_list.get_field('scores')
if len(scores.shape) == 1:
scores = np.reshape(scores, [-1, 1])
elif len(scores.shape) == 2:
if scores.shape[1] is None:
raise ValueError('scores field must have statically defined second '
'dimension')
else:
raise ValueError('scores field must be of rank 1 or 2')
num_boxes = box_mask_list.num_boxes()
num_scores = scores.shape[0]
num_classes = scores.shape[1]
if num_boxes != num_scores:
raise ValueError('Incorrect scores field length: actual vs expected.')
selected_boxes_list = []
for class_idx in range(num_classes):
box_mask_list_and_class_scores = np_box_mask_list.BoxMaskList(
box_data=box_mask_list.get(),
mask_data=box_mask_list.get_masks())
class_scores = np.reshape(scores[0:num_scores, class_idx], [-1])
box_mask_list_and_class_scores.add_field('scores', class_scores)
box_mask_list_filt = filter_scores_greater_than(
box_mask_list_and_class_scores, score_thresh)
nms_result = non_max_suppression(
box_mask_list_filt,
max_output_size=max_output_size,
iou_threshold=iou_thresh,
score_threshold=score_thresh)
nms_result.add_field(
'classes',
np.zeros_like(nms_result.get_field('scores')) + class_idx)
selected_boxes_list.append(nms_result)
selected_boxes = np_box_list_ops.concatenate(selected_boxes_list)
sorted_boxes = np_box_list_ops.sort_by_field(selected_boxes, 'scores')
return box_list_to_box_mask_list(boxlist=sorted_boxes)
def prune_non_overlapping_masks(box_mask_list1, box_mask_list2, minoverlap=0.0):
"""Prunes the boxes in list1 that overlap less than thresh with list2.
For each mask in box_mask_list1, we want its IOA to be more than minoverlap
with at least one of the masks in box_mask_list2. If it does not, we remove
it. If the masks are not full size image, we do the pruning based on boxes.
Args:
box_mask_list1: np_box_mask_list.BoxMaskList holding N boxes and masks.
box_mask_list2: np_box_mask_list.BoxMaskList holding M boxes and masks.
minoverlap: Minimum required overlap between boxes, to count them as
overlapping.
Returns:
A pruned box_mask_list with size [N', 4].
"""
intersection_over_area = ioa(box_mask_list2, box_mask_list1) # [M, N] tensor
intersection_over_area = np.amax(intersection_over_area, axis=0) # [N] tensor
keep_bool = np.greater_equal(intersection_over_area, np.array(minoverlap))
keep_inds = np.nonzero(keep_bool)[0]
new_box_mask_list1 = gather(box_mask_list1, keep_inds)
return new_box_mask_list1
def concatenate(box_mask_lists, fields=None):
"""Concatenate list of box_mask_lists.
This op concatenates a list of input box_mask_lists into a larger
box_mask_list. It also
handles concatenation of box_mask_list fields as long as the field tensor
shapes are equal except for the first dimension.
Args:
box_mask_lists: list of np_box_mask_list.BoxMaskList objects
fields: optional list of fields to also concatenate. By default, all
fields from the first BoxMaskList in the list are included in the
concatenation.
Returns:
a box_mask_list with number of boxes equal to
sum([box_mask_list.num_boxes() for box_mask_list in box_mask_list])
Raises:
ValueError: if box_mask_lists is invalid (i.e., is not a list, is empty, or
contains non box_mask_list objects), or if requested fields are not
contained in all box_mask_lists
"""
if fields is not None:
if 'masks' not in fields:
fields.append('masks')
return box_list_to_box_mask_list(
np_box_list_ops.concatenate(boxlists=box_mask_lists, fields=fields))
def filter_scores_greater_than(box_mask_list, thresh):
"""Filter to keep only boxes and masks with score exceeding a given threshold.
This op keeps the collection of boxes and masks whose corresponding scores are
greater than the input threshold.
Args:
box_mask_list: BoxMaskList holding N boxes and masks. Must contain a
'scores' field representing detection scores.
thresh: scalar threshold
Returns:
a BoxMaskList holding M boxes and masks where M <= N
Raises:
ValueError: if box_mask_list not a np_box_mask_list.BoxMaskList object or
if it does not have a scores field
"""
if not isinstance(box_mask_list, np_box_mask_list.BoxMaskList):
raise ValueError('box_mask_list must be a BoxMaskList')
if not box_mask_list.has_field('scores'):
raise ValueError('input box_mask_list must have \'scores\' field')
scores = box_mask_list.get_field('scores')
if len(scores.shape) > 2:
raise ValueError('Scores should have rank 1 or 2')
if len(scores.shape) == 2 and scores.shape[1] != 1:
raise ValueError('Scores should have rank 1 or have shape '
'consistent with [None, 1]')
high_score_indices = np.reshape(np.where(np.greater(scores, thresh)),
[-1]).astype(np.int32)
return gather(box_mask_list, high_score_indices)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.utils.np_box_mask_list_ops."""
import numpy as np
import tensorflow as tf
from object_detection.utils import np_box_mask_list
from object_detection.utils import np_box_mask_list_ops
class AreaRelatedTest(tf.test.TestCase):
def setUp(self):
boxes1 = np.array([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]],
dtype=float)
masks1_0 = np.array([[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 0, 0, 0, 0],
[1, 1, 1, 1, 0, 0, 0, 0]],
dtype=np.uint8)
masks1_1 = np.array([[1, 1, 1, 1, 1, 1, 1, 1],
[1, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0]],
dtype=np.uint8)
masks1 = np.stack([masks1_0, masks1_1])
boxes2 = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]],
dtype=float)
masks2_0 = np.array([[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 0, 0, 0, 0],
[1, 1, 1, 1, 0, 0, 0, 0]],
dtype=np.uint8)
masks2_1 = np.array([[1, 1, 1, 1, 1, 1, 1, 0],
[1, 1, 1, 1, 1, 0, 0, 0],
[1, 1, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0]],
dtype=np.uint8)
masks2_2 = np.array([[1, 1, 1, 1, 1, 0, 0, 0],
[1, 1, 1, 1, 1, 0, 0, 0],
[1, 1, 1, 1, 1, 0, 0, 0],
[1, 1, 1, 1, 1, 0, 0, 0],
[1, 1, 1, 1, 1, 0, 0, 0]],
dtype=np.uint8)
masks2 = np.stack([masks2_0, masks2_1, masks2_2])
self.box_mask_list1 = np_box_mask_list.BoxMaskList(
box_data=boxes1, mask_data=masks1)
self.box_mask_list2 = np_box_mask_list.BoxMaskList(
box_data=boxes2, mask_data=masks2)
def test_area(self):
areas = np_box_mask_list_ops.area(self.box_mask_list1)
expected_areas = np.array([8.0, 10.0], dtype=float)
self.assertAllClose(expected_areas, areas)
def test_intersection(self):
intersection = np_box_mask_list_ops.intersection(self.box_mask_list1,
self.box_mask_list2)
expected_intersection = np.array([[8.0, 0.0, 8.0], [0.0, 9.0, 7.0]],
dtype=float)
self.assertAllClose(intersection, expected_intersection)
def test_iou(self):
iou = np_box_mask_list_ops.iou(self.box_mask_list1, self.box_mask_list2)
expected_iou = np.array(
[[1.0, 0.0, 8.0 / 25.0], [0.0, 9.0 / 16.0, 7.0 / 28.0]], dtype=float)
self.assertAllClose(iou, expected_iou)
def test_ioa(self):
ioa21 = np_box_mask_list_ops.ioa(self.box_mask_list1, self.box_mask_list2)
expected_ioa21 = np.array([[1.0, 0.0, 8.0/25.0],
[0.0, 9.0/15.0, 7.0/25.0]],
dtype=np.float32)
self.assertAllClose(ioa21, expected_ioa21)
class NonMaximumSuppressionTest(tf.test.TestCase):
def setUp(self):
boxes1 = np.array(
[[4.0, 3.0, 7.0, 6.0], [5.0, 6.0, 10.0, 10.0]], dtype=float)
boxes2 = np.array(
[[3.0, 4.0, 6.0, 8.0], [5.0, 6.0, 10.0, 10.0], [1.0, 1.0, 10.0, 10.0]],
dtype=float)
masks1 = np.array(
[[[0, 1, 0], [1, 1, 0], [0, 0, 0]], [[0, 1, 1], [0, 1, 1], [0, 1, 1]]],
dtype=np.uint8)
masks2 = np.array(
[[[0, 1, 0], [1, 1, 1], [0, 0, 0]], [[0, 1, 0], [0, 0, 1], [0, 1, 1]],
[[0, 1, 1], [0, 1, 1], [0, 1, 1]]],
dtype=np.uint8)
self.boxes1 = boxes1
self.boxes2 = boxes2
self.masks1 = masks1
self.masks2 = masks2
def test_with_no_scores_field(self):
box_mask_list = np_box_mask_list.BoxMaskList(
box_data=self.boxes1, mask_data=self.masks1)
max_output_size = 3
iou_threshold = 0.5
with self.assertRaises(ValueError):
np_box_mask_list_ops.non_max_suppression(
box_mask_list, max_output_size, iou_threshold)
def test_nms_disabled_max_output_size_equals_one(self):
box_mask_list = np_box_mask_list.BoxMaskList(
box_data=self.boxes2, mask_data=self.masks2)
box_mask_list.add_field('scores',
np.array([.9, .75, .6], dtype=float))
max_output_size = 1
iou_threshold = 1. # No NMS
expected_boxes = np.array([[3.0, 4.0, 6.0, 8.0]], dtype=float)
expected_masks = np.array(
[[[0, 1, 0], [1, 1, 1], [0, 0, 0]]], dtype=np.uint8)
nms_box_mask_list = np_box_mask_list_ops.non_max_suppression(
box_mask_list, max_output_size, iou_threshold)
self.assertAllClose(nms_box_mask_list.get(), expected_boxes)
self.assertAllClose(nms_box_mask_list.get_masks(), expected_masks)
def test_multiclass_nms(self):
boxes = np.array(
[[0.2, 0.4, 0.8, 0.8], [0.4, 0.2, 0.8, 0.8], [0.6, 0.0, 1.0, 1.0]],
dtype=np.float32)
mask0 = np.array([[0, 0, 0, 0, 0],
[0, 0, 1, 1, 0],
[0, 0, 1, 1, 0],
[0, 0, 1, 1, 0],
[0, 0, 0, 0, 0]],
dtype=np.uint8)
mask1 = np.array([[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 1, 1, 1, 0],
[0, 1, 1, 1, 0],
[0, 0, 0, 0, 0]],
dtype=np.uint8)
mask2 = np.array([[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[1, 1, 1, 1, 1],
[1, 1, 1, 1, 1]],
dtype=np.uint8)
masks = np.stack([mask0, mask1, mask2])
box_mask_list = np_box_mask_list.BoxMaskList(
box_data=boxes, mask_data=masks)
scores = np.array([[-0.2, 0.1, 0.5, -0.4, 0.3],
[0.7, -0.7, 0.6, 0.2, -0.9],
[0.4, 0.34, -0.9, 0.2, 0.31]],
dtype=np.float32)
box_mask_list.add_field('scores', scores)
box_mask_list_clean = np_box_mask_list_ops.multi_class_non_max_suppression(
box_mask_list, score_thresh=0.25, iou_thresh=0.1, max_output_size=3)
scores_clean = box_mask_list_clean.get_field('scores')
classes_clean = box_mask_list_clean.get_field('classes')
boxes = box_mask_list_clean.get()
masks = box_mask_list_clean.get_masks()
expected_scores = np.array([0.7, 0.6, 0.34, 0.31])
expected_classes = np.array([0, 2, 1, 4])
expected_boxes = np.array([[0.4, 0.2, 0.8, 0.8],
[0.4, 0.2, 0.8, 0.8],
[0.6, 0.0, 1.0, 1.0],
[0.6, 0.0, 1.0, 1.0]],
dtype=np.float32)
self.assertAllClose(scores_clean, expected_scores)
self.assertAllClose(classes_clean, expected_classes)
self.assertAllClose(boxes, expected_boxes)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.utils.np_box_mask_list_test."""
import numpy as np
import tensorflow as tf
from object_detection.utils import np_box_mask_list
class BoxMaskListTest(tf.test.TestCase):
def test_invalid_box_mask_data(self):
with self.assertRaises(ValueError):
np_box_mask_list.BoxMaskList(
box_data=[0, 0, 1, 1],
mask_data=np.zeros([1, 3, 3], dtype=np.uint8))
with self.assertRaises(ValueError):
np_box_mask_list.BoxMaskList(
box_data=np.array([[0, 0, 1, 1]], dtype=int),
mask_data=np.zeros([1, 3, 3], dtype=np.uint8))
with self.assertRaises(ValueError):
np_box_mask_list.BoxMaskList(
box_data=np.array([0, 1, 1, 3, 4], dtype=float),
mask_data=np.zeros([1, 3, 3], dtype=np.uint8))
with self.assertRaises(ValueError):
np_box_mask_list.BoxMaskList(
box_data=np.array([[0, 1, 1, 3], [3, 1, 1, 5]], dtype=float),
mask_data=np.zeros([2, 3, 3], dtype=np.uint8))
with self.assertRaises(ValueError):
np_box_mask_list.BoxMaskList(
box_data=np.array([[0, 1, 1, 3], [1, 1, 1, 5]], dtype=float),
mask_data=np.zeros([3, 5, 5], dtype=np.uint8))
with self.assertRaises(ValueError):
np_box_mask_list.BoxMaskList(
box_data=np.array([[0, 1, 1, 3], [1, 1, 1, 5]], dtype=float),
mask_data=np.zeros([2, 5], dtype=np.uint8))
with self.assertRaises(ValueError):
np_box_mask_list.BoxMaskList(
box_data=np.array([[0, 1, 1, 3], [1, 1, 1, 5]], dtype=float),
mask_data=np.zeros([2, 5, 5, 5], dtype=np.uint8))
with self.assertRaises(ValueError):
np_box_mask_list.BoxMaskList(
box_data=np.array([[0, 1, 1, 3], [1, 1, 1, 5]], dtype=float),
mask_data=np.zeros([2, 5, 5], dtype=np.int32))
def test_has_field_with_existed_field(self):
boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]],
dtype=float)
box_mask_list = np_box_mask_list.BoxMaskList(
box_data=boxes, mask_data=np.zeros([3, 5, 5], dtype=np.uint8))
self.assertTrue(box_mask_list.has_field('boxes'))
self.assertTrue(box_mask_list.has_field('masks'))
def test_has_field_with_nonexisted_field(self):
boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]],
dtype=float)
box_mask_list = np_box_mask_list.BoxMaskList(
box_data=boxes, mask_data=np.zeros([3, 3, 3], dtype=np.uint8))
self.assertFalse(box_mask_list.has_field('scores'))
def test_get_field_with_existed_field(self):
boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]],
dtype=float)
masks = np.zeros([3, 3, 3], dtype=np.uint8)
box_mask_list = np_box_mask_list.BoxMaskList(
box_data=boxes, mask_data=masks)
self.assertTrue(np.allclose(box_mask_list.get_field('boxes'), boxes))
self.assertTrue(np.allclose(box_mask_list.get_field('masks'), masks))
def test_get_field_with_nonexited_field(self):
boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]],
dtype=float)
masks = np.zeros([3, 3, 3], dtype=np.uint8)
box_mask_list = np_box_mask_list.BoxMaskList(
box_data=boxes, mask_data=masks)
with self.assertRaises(ValueError):
box_mask_list.get_field('scores')
class AddExtraFieldTest(tf.test.TestCase):
def setUp(self):
boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]],
dtype=float)
masks = np.zeros([3, 3, 3], dtype=np.uint8)
self.box_mask_list = np_box_mask_list.BoxMaskList(
box_data=boxes, mask_data=masks)
def test_add_already_existed_field_bbox(self):
with self.assertRaises(ValueError):
self.box_mask_list.add_field('boxes',
np.array([[0, 0, 0, 1, 0]], dtype=float))
def test_add_already_existed_field_mask(self):
with self.assertRaises(ValueError):
self.box_mask_list.add_field('masks',
np.zeros([3, 3, 3], dtype=np.uint8))
def test_add_invalid_field_data(self):
with self.assertRaises(ValueError):
self.box_mask_list.add_field('scores', np.array([0.5, 0.7], dtype=float))
with self.assertRaises(ValueError):
self.box_mask_list.add_field('scores',
np.array([0.5, 0.7, 0.9, 0.1], dtype=float))
def test_add_single_dimensional_field_data(self):
box_mask_list = self.box_mask_list
scores = np.array([0.5, 0.7, 0.9], dtype=float)
box_mask_list.add_field('scores', scores)
self.assertTrue(np.allclose(scores, self.box_mask_list.get_field('scores')))
def test_add_multi_dimensional_field_data(self):
box_mask_list = self.box_mask_list
labels = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 1]],
dtype=int)
box_mask_list.add_field('labels', labels)
self.assertTrue(np.allclose(labels, self.box_mask_list.get_field('labels')))
def test_get_extra_fields(self):
box_mask_list = self.box_mask_list
self.assertItemsEqual(box_mask_list.get_extra_fields(), ['masks'])
scores = np.array([0.5, 0.7, 0.9], dtype=float)
box_mask_list.add_field('scores', scores)
self.assertItemsEqual(box_mask_list.get_extra_fields(), ['masks', 'scores'])
labels = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 1]],
dtype=int)
box_mask_list.add_field('labels', labels)
self.assertItemsEqual(box_mask_list.get_extra_fields(),
['masks', 'scores', 'labels'])
def test_get_coordinates(self):
y_min, x_min, y_max, x_max = self.box_mask_list.get_coordinates()
expected_y_min = np.array([3.0, 14.0, 0.0], dtype=float)
expected_x_min = np.array([4.0, 14.0, 0.0], dtype=float)
expected_y_max = np.array([6.0, 15.0, 20.0], dtype=float)
expected_x_max = np.array([8.0, 15.0, 20.0], dtype=float)
self.assertTrue(np.allclose(y_min, expected_y_min))
self.assertTrue(np.allclose(x_min, expected_x_min))
self.assertTrue(np.allclose(y_max, expected_y_max))
self.assertTrue(np.allclose(x_max, expected_x_max))
def test_num_boxes(self):
boxes = np.array([[0., 0., 100., 100.], [10., 30., 50., 70.]], dtype=float)
masks = np.zeros([2, 5, 5], dtype=np.uint8)
box_mask_list = np_box_mask_list.BoxMaskList(
box_data=boxes, mask_data=masks)
expected_num_boxes = 2
self.assertEquals(box_mask_list.num_boxes(), expected_num_boxes)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Operations for [N, height, width] numpy arrays representing masks.
Example mask operations that are supported:
* Areas: compute mask areas
* IOU: pairwise intersection-over-union scores
"""
import numpy as np
EPSILON = 1e-7
def area(masks):
"""Computes area of masks.
Args:
masks: Numpy array with shape [N, height, width] holding N masks. Masks
values are of type np.uint8 and values are in {0,1}.
Returns:
a numpy array with shape [N*1] representing mask areas.
Raises:
ValueError: If masks.dtype is not np.uint8
"""
if masks.dtype != np.uint8:
raise ValueError('Masks type should be np.uint8')
return np.sum(masks, axis=(1, 2), dtype=np.float32)
def intersection(masks1, masks2):
"""Compute pairwise intersection areas between masks.
Args:
masks1: a numpy array with shape [N, height, width] holding N masks. Masks
values are of type np.uint8 and values are in {0,1}.
masks2: a numpy array with shape [M, height, width] holding M masks. Masks
values are of type np.uint8 and values are in {0,1}.
Returns:
a numpy array with shape [N*M] representing pairwise intersection area.
Raises:
ValueError: If masks1 and masks2 are not of type np.uint8.
"""
if masks1.dtype != np.uint8 or masks2.dtype != np.uint8:
raise ValueError('masks1 and masks2 should be of type np.uint8')
n = masks1.shape[0]
m = masks2.shape[0]
answer = np.zeros([n, m], dtype=np.float32)
for i in np.arange(n):
for j in np.arange(m):
answer[i, j] = np.sum(np.minimum(masks1[i], masks2[j]), dtype=np.float32)
return answer
def iou(masks1, masks2):
"""Computes pairwise intersection-over-union between mask collections.
Args:
masks1: a numpy array with shape [N, height, width] holding N masks. Masks
values are of type np.uint8 and values are in {0,1}.
masks2: a numpy array with shape [M, height, width] holding N masks. Masks
values are of type np.uint8 and values are in {0,1}.
Returns:
a numpy array with shape [N, M] representing pairwise iou scores.
Raises:
ValueError: If masks1 and masks2 are not of type np.uint8.
"""
if masks1.dtype != np.uint8 or masks2.dtype != np.uint8:
raise ValueError('masks1 and masks2 should be of type np.uint8')
intersect = intersection(masks1, masks2)
area1 = area(masks1)
area2 = area(masks2)
union = np.expand_dims(area1, axis=1) + np.expand_dims(
area2, axis=0) - intersect
return intersect / np.maximum(union, EPSILON)
def ioa(masks1, masks2):
"""Computes pairwise intersection-over-area between box collections.
Intersection-over-area (ioa) between two masks, mask1 and mask2 is defined as
their intersection area over mask2's area. Note that ioa is not symmetric,
that is, IOA(mask1, mask2) != IOA(mask2, mask1).
Args:
masks1: a numpy array with shape [N, height, width] holding N masks. Masks
values are of type np.uint8 and values are in {0,1}.
masks2: a numpy array with shape [M, height, width] holding N masks. Masks
values are of type np.uint8 and values are in {0,1}.
Returns:
a numpy array with shape [N, M] representing pairwise ioa scores.
Raises:
ValueError: If masks1 and masks2 are not of type np.uint8.
"""
if masks1.dtype != np.uint8 or masks2.dtype != np.uint8:
raise ValueError('masks1 and masks2 should be of type np.uint8')
intersect = intersection(masks1, masks2)
areas = np.expand_dims(area(masks2), axis=0)
return intersect / (areas + EPSILON)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.np_mask_ops."""
import numpy as np
import tensorflow as tf
from object_detection.utils import np_mask_ops
class MaskOpsTests(tf.test.TestCase):
def setUp(self):
masks1_0 = np.array([[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 0, 0, 0, 0],
[1, 1, 1, 1, 0, 0, 0, 0]],
dtype=np.uint8)
masks1_1 = np.array([[1, 1, 1, 1, 1, 1, 1, 1],
[1, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0]],
dtype=np.uint8)
masks1 = np.stack([masks1_0, masks1_1])
masks2_0 = np.array([[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 0, 0, 0, 0],
[1, 1, 1, 1, 0, 0, 0, 0]],
dtype=np.uint8)
masks2_1 = np.array([[1, 1, 1, 1, 1, 1, 1, 0],
[1, 1, 1, 1, 1, 0, 0, 0],
[1, 1, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0]],
dtype=np.uint8)
masks2_2 = np.array([[1, 1, 1, 1, 1, 0, 0, 0],
[1, 1, 1, 1, 1, 0, 0, 0],
[1, 1, 1, 1, 1, 0, 0, 0],
[1, 1, 1, 1, 1, 0, 0, 0],
[1, 1, 1, 1, 1, 0, 0, 0]],
dtype=np.uint8)
masks2 = np.stack([masks2_0, masks2_1, masks2_2])
self.masks1 = masks1
self.masks2 = masks2
def testArea(self):
areas = np_mask_ops.area(self.masks1)
expected_areas = np.array([8.0, 10.0], dtype=np.float32)
self.assertAllClose(expected_areas, areas)
def testIntersection(self):
intersection = np_mask_ops.intersection(self.masks1, self.masks2)
expected_intersection = np.array(
[[8.0, 0.0, 8.0], [0.0, 9.0, 7.0]], dtype=np.float32)
self.assertAllClose(intersection, expected_intersection)
def testIOU(self):
iou = np_mask_ops.iou(self.masks1, self.masks2)
expected_iou = np.array(
[[1.0, 0.0, 8.0/25.0], [0.0, 9.0 / 16.0, 7.0 / 28.0]], dtype=np.float32)
self.assertAllClose(iou, expected_iou)
def testIOA(self):
ioa21 = np_mask_ops.ioa(self.masks1, self.masks2)
expected_ioa21 = np.array([[1.0, 0.0, 8.0/25.0],
[0.0, 9.0/15.0, 7.0/25.0]],
dtype=np.float32)
self.assertAllClose(ioa21, expected_ioa21)
if __name__ == '__main__':
tf.test.main()
......@@ -109,7 +109,8 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
matching_iou_threshold=0.5,
evaluate_corlocs=False,
metric_prefix=None,
use_weighted_mean_ap=False):
use_weighted_mean_ap=False,
evaluate_masks=False):
"""Constructor.
Args:
......@@ -125,20 +126,28 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
use_weighted_mean_ap: (optional) boolean which determines if the mean
average precision is computed directly from the scores and tp_fp_labels
of all classes.
evaluate_masks: If False, evaluation will be performed based on boxes.
If True, mask evaluation will be performed instead.
Raises:
ValueError: If the category ids are not 1-indexed.
"""
super(ObjectDetectionEvaluator, self).__init__(categories)
self._num_classes = max([cat['id'] for cat in categories])
if min(cat['id'] for cat in categories) < 1:
raise ValueError('Classes should be 1-indexed.')
self._matching_iou_threshold = matching_iou_threshold
self._use_weighted_mean_ap = use_weighted_mean_ap
self._label_id_offset = 1
self._evaluate_masks = evaluate_masks
self._evaluation = ObjectDetectionEvaluation(
self._num_classes,
num_groundtruth_classes=self._num_classes,
matching_iou_threshold=self._matching_iou_threshold,
use_weighted_mean_ap=self._use_weighted_mean_ap,
label_id_offset=self._label_id_offset)
self._image_ids = set([])
self._evaluate_corlocs = evaluate_corlocs
self._metric_prefix = (metric_prefix + '/') if metric_prefix else ''
self._metric_prefix = (metric_prefix + '_') if metric_prefix else ''
def add_single_ground_truth_image_info(self, image_id, groundtruth_dict):
"""Adds groundtruth for a single image to be used for evaluation.
......@@ -156,16 +165,19 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
M numpy boolean array denoting whether a ground truth box is a
difficult instance or not. This field is optional to support the case
that no boxes are difficult.
standard_fields.InputDataFields.groundtruth_instance_masks: Optional
numpy array of shape [num_boxes, height, width] with values in {0, 1}.
Raises:
ValueError: On adding groundtruth for an image more than once.
ValueError: On adding groundtruth for an image more than once. Will also
raise error if instance masks are not in groundtruth dictionary.
"""
if image_id in self._image_ids:
raise ValueError('Image with id {} already added.'.format(image_id))
groundtruth_classes = groundtruth_dict[
standard_fields.InputDataFields.groundtruth_classes]
groundtruth_classes -= self._label_id_offset
groundtruth_classes = (
groundtruth_dict[standard_fields.InputDataFields.groundtruth_classes] -
self._label_id_offset)
# If the key is not present in the groundtruth_dict or the array is empty
# (unless there are no annotations for the groundtruth on this image)
# use values from the dictionary or insert None otherwise.
......@@ -181,11 +193,20 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
logging.warn(
'image %s does not have groundtruth difficult flag specified',
image_id)
groundtruth_masks = None
if self._evaluate_masks:
if (standard_fields.InputDataFields.groundtruth_instance_masks not in
groundtruth_dict):
raise ValueError('Instance masks not in groundtruth dictionary.')
groundtruth_masks = groundtruth_dict[
standard_fields.InputDataFields.groundtruth_instance_masks]
self._evaluation.add_single_ground_truth_image_info(
image_id,
groundtruth_dict[standard_fields.InputDataFields.groundtruth_boxes],
groundtruth_classes,
groundtruth_is_difficult_list=groundtruth_difficult)
image_key=image_id,
groundtruth_boxes=groundtruth_dict[
standard_fields.InputDataFields.groundtruth_boxes],
groundtruth_class_labels=groundtruth_classes,
groundtruth_is_difficult_list=groundtruth_difficult,
groundtruth_masks=groundtruth_masks)
self._image_ids.update([image_id])
def add_single_detected_image_info(self, image_id, detections_dict):
......@@ -202,15 +223,31 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
standard_fields.DetectionResultFields.detection_classes: integer numpy
array of shape [num_boxes] containing 1-indexed detection classes for
the boxes.
standard_fields.DetectionResultFields.detection_masks: uint8 numpy
array of shape [num_boxes, height, width] containing `num_boxes` masks
of values ranging between 0 and 1.
Raises:
ValueError: If detection masks are not in detections dictionary.
"""
detection_classes = detections_dict[
standard_fields.DetectionResultFields.detection_classes]
detection_classes -= self._label_id_offset
detection_classes = (
detections_dict[standard_fields.DetectionResultFields.detection_classes]
- self._label_id_offset)
detection_masks = None
if self._evaluate_masks:
if (standard_fields.DetectionResultFields.detection_masks not in
detections_dict):
raise ValueError('Detection masks not in detections dictionary.')
detection_masks = detections_dict[
standard_fields.DetectionResultFields.detection_masks]
self._evaluation.add_single_detected_image_info(
image_id,
detections_dict[standard_fields.DetectionResultFields.detection_boxes],
detections_dict[standard_fields.DetectionResultFields.detection_scores],
detection_classes)
image_key=image_id,
detected_boxes=detections_dict[
standard_fields.DetectionResultFields.detection_boxes],
detected_scores=detections_dict[
standard_fields.DetectionResultFields.detection_scores],
detected_class_labels=detection_classes,
detected_masks=detection_masks)
def evaluate(self):
"""Compute evaluation result.
......@@ -257,7 +294,7 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
def clear(self):
"""Clears the state to prepare for a fresh evaluation."""
self._evaluation = ObjectDetectionEvaluation(
self._num_classes,
num_groundtruth_classes=self._num_classes,
matching_iou_threshold=self._matching_iou_threshold,
use_weighted_mean_ap=self._use_weighted_mean_ap,
label_id_offset=self._label_id_offset)
......@@ -272,7 +309,7 @@ class PascalDetectionEvaluator(ObjectDetectionEvaluator):
categories,
matching_iou_threshold=matching_iou_threshold,
evaluate_corlocs=False,
metric_prefix='PASCAL',
metric_prefix='PascalBoxes',
use_weighted_mean_ap=False)
......@@ -295,10 +332,47 @@ class WeightedPascalDetectionEvaluator(ObjectDetectionEvaluator):
categories,
matching_iou_threshold=matching_iou_threshold,
evaluate_corlocs=False,
metric_prefix='WeightedPASCAL',
metric_prefix='WeightedPascalBoxes',
use_weighted_mean_ap=True)
class PascalInstanceSegmentationEvaluator(ObjectDetectionEvaluator):
"""A class to evaluate instance masks using PASCAL metrics."""
def __init__(self, categories, matching_iou_threshold=0.5):
super(PascalInstanceSegmentationEvaluator, self).__init__(
categories,
matching_iou_threshold=matching_iou_threshold,
evaluate_corlocs=False,
metric_prefix='PascalMasks',
use_weighted_mean_ap=False,
evaluate_masks=True)
class WeightedPascalInstanceSegmentationEvaluator(ObjectDetectionEvaluator):
"""A class to evaluate instance masks using weighted PASCAL metrics.
Weighted PASCAL metrics computes the mean average precision as the average
precision given the scores and tp_fp_labels of all classes. In comparison,
PASCAL metrics computes the mean average precision as the mean of the
per-class average precisions.
This definition is very similar to the mean of the per-class average
precisions weighted by class frequency. However, they are typically not the
same as the average precision is not a linear function of the scores and
tp_fp_labels.
"""
def __init__(self, categories, matching_iou_threshold=0.5):
super(WeightedPascalInstanceSegmentationEvaluator, self).__init__(
categories,
matching_iou_threshold=matching_iou_threshold,
evaluate_corlocs=False,
metric_prefix='WeightedPascalMasks',
use_weighted_mean_ap=True,
evaluate_masks=True)
class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator):
"""A class to evaluate detections using Open Images V2 metrics.
......@@ -348,9 +422,9 @@ class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator):
if image_id in self._image_ids:
raise ValueError('Image with id {} already added.'.format(image_id))
groundtruth_classes = groundtruth_dict[
standard_fields.InputDataFields.groundtruth_classes]
groundtruth_classes -= self._label_id_offset
groundtruth_classes = (
groundtruth_dict[standard_fields.InputDataFields.groundtruth_classes] -
self._label_id_offset)
# If the key is not present in the groundtruth_dict or the array is empty
# (unless there are no annotations for the groundtruth on this image)
# use values from the dictionary or insert None otherwise.
......@@ -392,19 +466,29 @@ class ObjectDetectionEvaluation(object):
nms_max_output_boxes=10000,
use_weighted_mean_ap=False,
label_id_offset=0):
if num_groundtruth_classes < 1:
raise ValueError('Need at least 1 groundtruth class for evaluation.')
self.per_image_eval = per_image_evaluation.PerImageEvaluation(
num_groundtruth_classes, matching_iou_threshold, nms_iou_threshold,
nms_max_output_boxes)
num_groundtruth_classes=num_groundtruth_classes,
matching_iou_threshold=matching_iou_threshold,
nms_iou_threshold=nms_iou_threshold,
nms_max_output_boxes=nms_max_output_boxes)
self.num_class = num_groundtruth_classes
self.use_weighted_mean_ap = use_weighted_mean_ap
self.label_id_offset = label_id_offset
self.groundtruth_boxes = {}
self.groundtruth_class_labels = {}
self.groundtruth_masks = {}
self.groundtruth_is_difficult_list = {}
self.groundtruth_is_group_of_list = {}
self.num_gt_instances_per_class = np.zeros(self.num_class, dtype=int)
self.num_gt_imgs_per_class = np.zeros(self.num_class, dtype=int)
self._initialize_detections()
def _initialize_detections(self):
self.detection_keys = set()
self.scores_per_class = [[] for _ in range(self.num_class)]
self.tp_fp_labels_per_class = [[] for _ in range(self.num_class)]
......@@ -415,24 +499,16 @@ class ObjectDetectionEvaluation(object):
self.recalls_per_class = []
self.corloc_per_class = np.ones(self.num_class, dtype=float)
self.use_weighted_mean_ap = use_weighted_mean_ap
def clear_detections(self):
self.detection_keys = {}
self.scores_per_class = [[] for _ in range(self.num_class)]
self.tp_fp_labels_per_class = [[] for _ in range(self.num_class)]
self.num_images_correctly_detected_per_class = np.zeros(self.num_class)
self.average_precision_per_class = np.zeros(self.num_class, dtype=float)
self.precisions_per_class = []
self.recalls_per_class = []
self.corloc_per_class = np.ones(self.num_class, dtype=float)
self._initialize_detections()
def add_single_ground_truth_image_info(self,
image_key,
groundtruth_boxes,
groundtruth_class_labels,
groundtruth_is_difficult_list=None,
groundtruth_is_group_of_list=None):
groundtruth_is_group_of_list=None,
groundtruth_masks=None):
"""Adds groundtruth for a single image to be used for evaluation.
Args:
......@@ -448,6 +524,9 @@ class ObjectDetectionEvaluation(object):
groundtruth_is_group_of_list: A length M numpy boolean array denoting
whether a ground truth box is a group-of box or not. To support
the case that no boxes are groups-of, it is by default set as None.
groundtruth_masks: uint8 numpy array of shape
[num_boxes, height, width] containing `num_boxes` groundtruth masks.
The mask values range from 0 to 1.
"""
if image_key in self.groundtruth_boxes:
logging.warn(
......@@ -457,6 +536,7 @@ class ObjectDetectionEvaluation(object):
self.groundtruth_boxes[image_key] = groundtruth_boxes
self.groundtruth_class_labels[image_key] = groundtruth_class_labels
self.groundtruth_masks[image_key] = groundtruth_masks
if groundtruth_is_difficult_list is None:
num_boxes = groundtruth_boxes.shape[0]
groundtruth_is_difficult_list = np.zeros(num_boxes, dtype=bool)
......@@ -474,7 +554,8 @@ class ObjectDetectionEvaluation(object):
groundtruth_is_group_of_list.astype(dtype=bool))
def add_single_detected_image_info(self, image_key, detected_boxes,
detected_scores, detected_class_labels):
detected_scores, detected_class_labels,
detected_masks=None):
"""Adds detections for a single image to be used for evaluation.
Args:
......@@ -486,6 +567,9 @@ class ObjectDetectionEvaluation(object):
detection scores for the boxes.
detected_class_labels: integer numpy array of shape [num_boxes] containing
0-indexed detection classes for the boxes.
detected_masks: np.uint8 numpy array of shape [num_boxes, height, width]
containing `num_boxes` detection masks with values ranging
between 0 and 1.
Raises:
ValueError: if the number of boxes, scores and class labels differ in
......@@ -508,6 +592,10 @@ class ObjectDetectionEvaluation(object):
if image_key in self.groundtruth_boxes:
groundtruth_boxes = self.groundtruth_boxes[image_key]
groundtruth_class_labels = self.groundtruth_class_labels[image_key]
# Masks are popped instead of look up. The reason is that we do not want
# to keep all masks in memory which can cause memory overflow.
groundtruth_masks = self.groundtruth_masks.pop(
image_key)
groundtruth_is_difficult_list = self.groundtruth_is_difficult_list[
image_key]
groundtruth_is_group_of_list = self.groundtruth_is_group_of_list[
......@@ -515,13 +603,23 @@ class ObjectDetectionEvaluation(object):
else:
groundtruth_boxes = np.empty(shape=[0, 4], dtype=float)
groundtruth_class_labels = np.array([], dtype=int)
if detected_masks is None:
groundtruth_masks = None
else:
groundtruth_masks = np.empty(shape=[0, 1, 1], dtype=float)
groundtruth_is_difficult_list = np.array([], dtype=bool)
groundtruth_is_group_of_list = np.array([], dtype=bool)
scores, tp_fp_labels, is_class_correctly_detected_in_image = (
self.per_image_eval.compute_object_detection_metrics(
detected_boxes, detected_scores, detected_class_labels,
groundtruth_boxes, groundtruth_class_labels,
groundtruth_is_difficult_list, groundtruth_is_group_of_list))
detected_boxes=detected_boxes,
detected_scores=detected_scores,
detected_class_labels=detected_class_labels,
groundtruth_boxes=groundtruth_boxes,
groundtruth_class_labels=groundtruth_class_labels,
groundtruth_is_difficult_list=groundtruth_is_difficult_list,
groundtruth_is_group_of_list=groundtruth_is_group_of_list,
detected_masks=detected_masks,
groundtruth_masks=groundtruth_masks))
for i in range(self.num_class):
if scores[i].shape[0] > 0:
......
......@@ -89,12 +89,12 @@ class OpenImagesV2EvaluationTest(tf.test.TestCase):
})
metrics = oiv2_evaluator.evaluate()
self.assertAlmostEqual(
metrics['OpenImagesV2/PerformanceByCategory/AP@0.5IOU/dog'], 0.0)
metrics['OpenImagesV2_PerformanceByCategory/AP@0.5IOU/dog'], 0.0)
self.assertAlmostEqual(
metrics['OpenImagesV2/PerformanceByCategory/AP@0.5IOU/elephant'], 0.0)
metrics['OpenImagesV2_PerformanceByCategory/AP@0.5IOU/elephant'], 0.0)
self.assertAlmostEqual(
metrics['OpenImagesV2/PerformanceByCategory/AP@0.5IOU/cat'], 0.16666666)
self.assertAlmostEqual(metrics['OpenImagesV2/Precision/mAP@0.5IOU'],
metrics['OpenImagesV2_PerformanceByCategory/AP@0.5IOU/cat'], 0.16666666)
self.assertAlmostEqual(metrics['OpenImagesV2_Precision/mAP@0.5IOU'],
0.05555555)
oiv2_evaluator.clear()
self.assertFalse(oiv2_evaluator._image_ids)
......@@ -102,7 +102,7 @@ class OpenImagesV2EvaluationTest(tf.test.TestCase):
class PascalEvaluationTest(tf.test.TestCase):
def test_returns_correct_metric_values(self):
def test_returns_correct_metric_values_on_boxes(self):
categories = [{'id': 1, 'name': 'cat'},
{'id': 2, 'name': 'dog'},
{'id': 3, 'name': 'elephant'}]
......@@ -158,12 +158,138 @@ class PascalEvaluationTest(tf.test.TestCase):
metrics = pascal_evaluator.evaluate()
self.assertAlmostEqual(
metrics['PASCAL/PerformanceByCategory/AP@0.5IOU/dog'], 0.0)
metrics['PascalBoxes_PerformanceByCategory/AP@0.5IOU/dog'], 0.0)
self.assertAlmostEqual(
metrics['PascalBoxes_PerformanceByCategory/AP@0.5IOU/elephant'], 0.0)
self.assertAlmostEqual(
metrics['PascalBoxes_PerformanceByCategory/AP@0.5IOU/cat'], 0.16666666)
self.assertAlmostEqual(metrics['PascalBoxes_Precision/mAP@0.5IOU'],
0.05555555)
pascal_evaluator.clear()
self.assertFalse(pascal_evaluator._image_ids)
def test_returns_correct_metric_values_on_masks(self):
categories = [{'id': 1, 'name': 'cat'},
{'id': 2, 'name': 'dog'},
{'id': 3, 'name': 'elephant'}]
# Add groundtruth
pascal_evaluator = (
object_detection_evaluation.PascalInstanceSegmentationEvaluator(
categories))
image_key1 = 'img1'
groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
dtype=float)
groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int)
groundtruth_masks_1_0 = np.array([[1, 0, 0, 0],
[1, 0, 0, 0],
[1, 0, 0, 0]], dtype=np.uint8)
groundtruth_masks_1_1 = np.array([[0, 0, 1, 0],
[0, 0, 1, 0],
[0, 0, 1, 0]], dtype=np.uint8)
groundtruth_masks_1_2 = np.array([[0, 1, 0, 0],
[0, 1, 0, 0],
[0, 1, 0, 0]], dtype=np.uint8)
groundtruth_masks1 = np.stack(
[groundtruth_masks_1_0, groundtruth_masks_1_1, groundtruth_masks_1_2],
axis=0)
pascal_evaluator.add_single_ground_truth_image_info(
image_key1, {
standard_fields.InputDataFields.groundtruth_boxes:
groundtruth_boxes1,
standard_fields.InputDataFields.groundtruth_instance_masks:
groundtruth_masks1,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels1,
standard_fields.InputDataFields.groundtruth_difficult:
np.array([], dtype=bool)
})
image_key2 = 'img2'
groundtruth_boxes2 = np.array([[10, 10, 11, 11], [500, 500, 510, 510],
[10, 10, 12, 12]], dtype=float)
groundtruth_class_labels2 = np.array([1, 1, 3], dtype=int)
groundtruth_is_difficult_list2 = np.array([False, True, False], dtype=bool)
groundtruth_masks_2_0 = np.array([[1, 1, 1, 1],
[0, 0, 0, 0],
[0, 0, 0, 0]], dtype=np.uint8)
groundtruth_masks_2_1 = np.array([[0, 0, 0, 0],
[1, 1, 1, 1],
[0, 0, 0, 0]], dtype=np.uint8)
groundtruth_masks_2_2 = np.array([[0, 0, 0, 0],
[0, 0, 0, 0],
[1, 1, 1, 1]], dtype=np.uint8)
groundtruth_masks2 = np.stack(
[groundtruth_masks_2_0, groundtruth_masks_2_1, groundtruth_masks_2_2],
axis=0)
pascal_evaluator.add_single_ground_truth_image_info(
image_key2, {
standard_fields.InputDataFields.groundtruth_boxes:
groundtruth_boxes2,
standard_fields.InputDataFields.groundtruth_instance_masks:
groundtruth_masks2,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels2,
standard_fields.InputDataFields.groundtruth_difficult:
groundtruth_is_difficult_list2
})
image_key3 = 'img3'
groundtruth_boxes3 = np.array([[0, 0, 1, 1]], dtype=float)
groundtruth_class_labels3 = np.array([2], dtype=int)
groundtruth_masks_3_0 = np.array([[1, 1, 1, 1],
[1, 1, 1, 1],
[1, 1, 1, 1]], dtype=np.uint8)
groundtruth_masks3 = np.stack([groundtruth_masks_3_0], axis=0)
pascal_evaluator.add_single_ground_truth_image_info(
image_key3, {
standard_fields.InputDataFields.groundtruth_boxes:
groundtruth_boxes3,
standard_fields.InputDataFields.groundtruth_instance_masks:
groundtruth_masks3,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels3
})
# Add detections
image_key = 'img2'
detected_boxes = np.array(
[[10, 10, 11, 11], [100, 100, 120, 120], [100, 100, 220, 220]],
dtype=float)
detected_class_labels = np.array([1, 1, 3], dtype=int)
detected_scores = np.array([0.7, 0.8, 0.9], dtype=float)
detected_masks_0 = np.array([[1, 1, 1, 1],
[0, 0, 1, 0],
[0, 0, 0, 0]], dtype=np.uint8)
detected_masks_1 = np.array([[1, 0, 0, 0],
[1, 1, 0, 0],
[0, 0, 0, 0]], dtype=np.uint8)
detected_masks_2 = np.array([[0, 1, 0, 0],
[0, 1, 1, 0],
[0, 1, 0, 0]], dtype=np.uint8)
detected_masks = np.stack(
[detected_masks_0, detected_masks_1, detected_masks_2], axis=0)
pascal_evaluator.add_single_detected_image_info(
image_key, {
standard_fields.DetectionResultFields.detection_boxes:
detected_boxes,
standard_fields.DetectionResultFields.detection_masks:
detected_masks,
standard_fields.DetectionResultFields.detection_scores:
detected_scores,
standard_fields.DetectionResultFields.detection_classes:
detected_class_labels
})
metrics = pascal_evaluator.evaluate()
self.assertAlmostEqual(
metrics['PascalMasks_PerformanceByCategory/AP@0.5IOU/dog'], 0.0)
self.assertAlmostEqual(
metrics['PASCAL/PerformanceByCategory/AP@0.5IOU/elephant'], 0.0)
metrics['PascalMasks_PerformanceByCategory/AP@0.5IOU/elephant'], 0.0)
self.assertAlmostEqual(
metrics['PASCAL/PerformanceByCategory/AP@0.5IOU/cat'], 0.16666666)
self.assertAlmostEqual(metrics['PASCAL/Precision/mAP@0.5IOU'], 0.05555555)
metrics['PascalMasks_PerformanceByCategory/AP@0.5IOU/cat'], 0.16666666)
self.assertAlmostEqual(metrics['PascalMasks_Precision/mAP@0.5IOU'],
0.05555555)
pascal_evaluator.clear()
self.assertFalse(pascal_evaluator._image_ids)
......@@ -363,6 +489,11 @@ class ObjectDetectionEvaluationTest(tf.test.TestCase):
self.od_eval.add_single_detected_image_info(
image_key, detected_boxes, detected_scores, detected_class_labels)
def test_value_error_on_zero_classes(self):
with self.assertRaises(ValueError):
object_detection_evaluation.ObjectDetectionEvaluation(
num_groundtruth_classes=0)
def test_add_single_ground_truth_image_info(self):
expected_num_gt_instances_per_class = np.array([3, 1, 1], dtype=int)
expected_num_gt_imgs_per_class = np.array([2, 1, 2], dtype=int)
......
......@@ -23,6 +23,7 @@ import tensorflow as tf
from object_detection.core import box_list
from object_detection.core import box_list_ops
from object_detection.core import standard_fields as fields
from object_detection.utils import shape_utils
from object_detection.utils import static_shape
......@@ -67,7 +68,7 @@ def normalized_to_image_coordinates(normalized_boxes, image_shape,
box_list.BoxList(normalized_boxes),
image_shape[1], image_shape[2], check_range=False).get()
absolute_boxes = tf.map_fn(
absolute_boxes = shape_utils.static_or_dynamic_map_fn(
_to_absolute_coordinates,
elems=(normalized_boxes),
dtype=tf.float32,
......@@ -115,6 +116,28 @@ def meshgrid(x, y):
return xgrid, ygrid
def fixed_padding(inputs, kernel_size, rate=1):
"""Pads the input along the spatial dimensions independently of input size.
Args:
inputs: A tensor of size [batch, height_in, width_in, channels].
kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
Should be a positive integer.
rate: An integer, rate for atrous convolution.
Returns:
output: A tensor of size [batch, height_out, width_out, channels] with the
input, either intact (if kernel_size == 1) or padded (if kernel_size > 1).
"""
kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
pad_total = kernel_size_effective - 1
pad_beg = pad_total // 2
pad_end = pad_total - pad_beg
padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end],
[pad_beg, pad_end], [0, 0]])
return padded_inputs
def pad_to_multiple(tensor, multiple):
"""Returns the tensor zero padded to the specified multiple.
......@@ -209,8 +232,10 @@ def padded_one_hot_encoding(indices, depth, left_pad):
raise ValueError('`left_pad` must be a non-negative integer.')
if depth == 0:
return None
if len(indices.get_shape().as_list()) != 1:
raise ValueError('`indices` must have rank 1')
rank = len(indices.get_shape().as_list())
if rank != 1:
raise ValueError('`indices` must have rank 1, but has rank=%s' % rank)
def one_hot_and_pad():
one_hot = tf.cast(tf.one_hot(tf.cast(indices, tf.int64), depth,
......@@ -284,6 +309,11 @@ def indices_to_dense_vector(indices,
[zeros, values])
def reduce_sum_trailing_dimensions(tensor, ndims):
"""Computes sum across all dimensions following first `ndims` dimensions."""
return tf.reduce_sum(tensor, axis=tuple(range(ndims, tensor.shape.ndims)))
def retain_groundtruth(tensor_dict, valid_indices):
"""Retains groundtruth by valid indices.
......@@ -627,7 +657,7 @@ def position_sensitive_crop_regions(image,
position_sensitive_features = tf.add_n(image_crops) / len(image_crops)
# Then average over spatial positions within the bins.
position_sensitive_features = tf.reduce_mean(
position_sensitive_features, [1, 2], keep_dims=True)
position_sensitive_features, [1, 2], keepdims=True)
else:
# Reorder height/width to depth channel.
block_size = bin_crop_size[0]
......@@ -739,3 +769,53 @@ def merge_boxes_with_multiple_labels(boxes, classes, num_classes):
class_encodings = tf.reshape(class_encodings, [-1, num_classes])
merged_box_indices = tf.reshape(merged_box_indices, [-1])
return merged_boxes, class_encodings, merged_box_indices
def nearest_neighbor_upsampling(input_tensor, scale):
"""Nearest neighbor upsampling implementation.
Nearest neighbor upsampling function that maps input tensor with shape
[batch_size, height, width, channels] to [batch_size, height * scale
, width * scale, channels]. This implementation only uses reshape and tile to
make it compatible with certain hardware.
Args:
input_tensor: A float32 tensor of size [batch, height_in, width_in,
channels].
scale: An integer multiple to scale resolution of input data.
Returns:
data_up: A float32 tensor of size
[batch, height_in*scale, width_in*scale, channels].
"""
shape = shape_utils.combined_static_and_dynamic_shape(input_tensor)
shape_before_tile = [shape[0], shape[1], 1, shape[2], 1, shape[3]]
shape_after_tile = [shape[0], shape[1] * scale, shape[2] * scale, shape[3]]
data_reshaped = tf.reshape(input_tensor, shape_before_tile)
resized_tensor = tf.tile(data_reshaped, [1, 1, scale, 1, scale, 1])
resized_tensor = tf.reshape(resized_tensor, shape_after_tile)
return resized_tensor
def matmul_gather_on_zeroth_axis(params, indices, scope=None):
"""Matrix multiplication based implementation of tf.gather on zeroth axis.
TODO(rathodv, jonathanhuang): enable sparse matmul option.
Args:
params: A float32 Tensor. The tensor from which to gather values.
Must be at least rank 1.
indices: A Tensor. Must be one of the following types: int32, int64.
Must be in range [0, params.shape[0])
scope: A name for the operation (optional).
Returns:
A Tensor. Has the same type as params. Values from params gathered
from indices given by indices, with shape indices.shape + params.shape[1:].
"""
with tf.name_scope(scope, 'MatMulGather'):
index_range = params.shape[0]
params2d = tf.reshape(params, [index_range, -1])
indicator_matrix = tf.one_hot(indices, index_range)
gathered_result_flattened = tf.matmul(indicator_matrix, params2d)
return tf.reshape(gathered_result_flattened,
indices.shape.concatenate(params.shape[1:]))
......@@ -19,6 +19,7 @@ import tensorflow as tf
from object_detection.core import standard_fields as fields
from object_detection.utils import ops
from object_detection.utils import test_case
class NormalizedToImageCoordinatesTest(tf.test.TestCase):
......@@ -42,6 +43,18 @@ class NormalizedToImageCoordinatesTest(tf.test.TestCase):
self.assertAllEqual(absolute_boxes, expected_boxes)
class ReduceSumTrailingDimensions(tf.test.TestCase):
def test_reduce_sum_trailing_dimensions(self):
input_tensor = tf.placeholder(tf.float32, shape=[None, None, None])
reduced_tensor = ops.reduce_sum_trailing_dimensions(input_tensor, ndims=2)
with self.test_session() as sess:
reduced_np = sess.run(reduced_tensor,
feed_dict={input_tensor: np.ones((2, 2, 2),
np.float32)})
self.assertAllClose(reduced_np, 2 * np.ones((2, 2), np.float32))
class MeshgridTest(tf.test.TestCase):
def test_meshgrid_numpy_comparison(self):
......@@ -83,6 +96,30 @@ class MeshgridTest(tf.test.TestCase):
self.assertEqual(ygrid_output[yind + xind], y[yind])
class OpsTestFixedPadding(tf.test.TestCase):
def test_3x3_kernel(self):
tensor = tf.constant([[[[0.], [0.]], [[0.], [0.]]]])
padded_tensor = ops.fixed_padding(tensor, 3)
with self.test_session() as sess:
padded_tensor_out = sess.run(padded_tensor)
self.assertEqual((1, 4, 4, 1), padded_tensor_out.shape)
def test_5x5_kernel(self):
tensor = tf.constant([[[[0.], [0.]], [[0.], [0.]]]])
padded_tensor = ops.fixed_padding(tensor, 5)
with self.test_session() as sess:
padded_tensor_out = sess.run(padded_tensor)
self.assertEqual((1, 6, 6, 1), padded_tensor_out.shape)
def test_3x3_atrous_kernel(self):
tensor = tf.constant([[[[0.], [0.]], [[0.], [0.]]]])
padded_tensor = ops.fixed_padding(tensor, 3, 2)
with self.test_session() as sess:
padded_tensor_out = sess.run(padded_tensor)
self.assertEqual((1, 6, 6, 1), padded_tensor_out.shape)
class OpsTestPadToMultiple(tf.test.TestCase):
def test_zero_padding(self):
......@@ -1128,5 +1165,66 @@ class MergeBoxesWithMultipleLabelsTest(tf.test.TestCase):
self.assertAllEqual(np_merged_box_indices.shape, [0])
class NearestNeighborUpsamplingTest(test_case.TestCase):
def test_upsampling(self):
def graph_fn(inputs):
custom_op_output = ops.nearest_neighbor_upsampling(inputs, scale=2)
tf_op_output = tf.image.resize_images(
inputs, [4, 4], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
return (custom_op_output, tf_op_output)
inputs = np.reshape(np.arange(2**4), [2, 2, 2, 2])
(custom_op_output, tf_op_output) = self.execute(graph_fn, [inputs])
self.assertAllClose(custom_op_output, tf_op_output)
class MatmulGatherOnZerothAxis(test_case.TestCase):
def test_gather_2d(self):
def graph_fn(params, indices):
return ops.matmul_gather_on_zeroth_axis(params, indices)
params = np.array([[1, 2, 3, 4],
[5, 6, 7, 8],
[9, 10, 11, 12],
[0, 1, 0, 0]], dtype=np.float32)
indices = np.array([2, 2, 1])
expected_output = np.array([[9, 10, 11, 12], [9, 10, 11, 12], [5, 6, 7, 8]])
gather_output = self.execute(graph_fn, [params, indices])
self.assertAllClose(gather_output, expected_output)
def test_gather_3d(self):
def graph_fn(params, indices):
return ops.matmul_gather_on_zeroth_axis(params, indices)
params = np.array([[[1, 2], [3, 4]],
[[5, 6], [7, 8]],
[[9, 10], [11, 12]],
[[0, 1], [0, 0]]], dtype=np.float32)
indices = np.array([0, 3, 1])
expected_output = np.array([[[1, 2], [3, 4]],
[[0, 1], [0, 0]],
[[5, 6], [7, 8]]])
gather_output = self.execute(graph_fn, [params, indices])
self.assertAllClose(gather_output, expected_output)
def test_gather_with_many_indices(self):
def graph_fn(params, indices):
return ops.matmul_gather_on_zeroth_axis(params, indices)
params = np.array([[1, 2, 3, 4],
[5, 6, 7, 8],
[9, 10, 11, 12],
[0, 1, 0, 0]], dtype=np.float32)
indices = np.array([0, 0, 0, 0, 0, 0])
expected_output = np.array(6*[[1, 2, 3, 4]])
gather_output = self.execute(graph_fn, [params, indices])
self.assertAllClose(gather_output, expected_output)
if __name__ == '__main__':
tf.test.main()
......@@ -17,11 +17,15 @@
Annotate each detected result as true positives or false positive according to
a predefined IOU ratio. Non Maximum Supression is used by default. Multi class
detection is supported by default.
Based on the settings, per image evaluation is either performed on boxes or
on object masks.
"""
import numpy as np
from object_detection.utils import np_box_list
from object_detection.utils import np_box_list_ops
from object_detection.utils import np_box_mask_list
from object_detection.utils import np_box_mask_list_ops
class PerImageEvaluation(object):
......@@ -49,7 +53,8 @@ class PerImageEvaluation(object):
def compute_object_detection_metrics(
self, detected_boxes, detected_scores, detected_class_labels,
groundtruth_boxes, groundtruth_class_labels,
groundtruth_is_difficult_lists, groundtruth_is_group_of_list):
groundtruth_is_difficult_list, groundtruth_is_group_of_list,
detected_masks=None, groundtruth_masks=None):
"""Evaluates detections as being tp, fp or ignored from a single image.
The evaluation is done in two stages:
......@@ -70,10 +75,15 @@ class PerImageEvaluation(object):
regions of object instances in ground truth
groundtruth_class_labels: An integer numpy array of shape [M, 1],
representing M class labels of object instances in ground truth
groundtruth_is_difficult_lists: A boolean numpy array of length M denoting
groundtruth_is_difficult_list: A boolean numpy array of length M denoting
whether a ground truth box is a difficult instance or not
groundtruth_is_group_of_list: A boolean numpy array of length M denoting
whether a ground truth box has group-of tag
detected_masks: (optional) A uint8 numpy array of shape
[N, height, width]. If not None, the metrics will be computed based
on masks.
groundtruth_masks: (optional) A uint8 numpy array of shape
[M, height, width].
Returns:
scores: A list of C float numpy arrays. Each numpy array is of
......@@ -86,22 +96,35 @@ class PerImageEvaluation(object):
shape [C, 1], indicating whether the correponding class has a least
one instance being correctly detected in the image
"""
detected_boxes, detected_scores, detected_class_labels = (
detected_boxes, detected_scores, detected_class_labels, detected_masks = (
self._remove_invalid_boxes(detected_boxes, detected_scores,
detected_class_labels))
detected_class_labels, detected_masks))
scores, tp_fp_labels = self._compute_tp_fp(
detected_boxes, detected_scores, detected_class_labels,
groundtruth_boxes, groundtruth_class_labels,
groundtruth_is_difficult_lists, groundtruth_is_group_of_list)
detected_boxes=detected_boxes,
detected_scores=detected_scores,
detected_class_labels=detected_class_labels,
groundtruth_boxes=groundtruth_boxes,
groundtruth_class_labels=groundtruth_class_labels,
groundtruth_is_difficult_list=groundtruth_is_difficult_list,
groundtruth_is_group_of_list=groundtruth_is_group_of_list,
detected_masks=detected_masks,
groundtruth_masks=groundtruth_masks)
is_class_correctly_detected_in_image = self._compute_cor_loc(
detected_boxes, detected_scores, detected_class_labels,
groundtruth_boxes, groundtruth_class_labels)
detected_boxes=detected_boxes,
detected_scores=detected_scores,
detected_class_labels=detected_class_labels,
groundtruth_boxes=groundtruth_boxes,
groundtruth_class_labels=groundtruth_class_labels,
detected_masks=detected_masks,
groundtruth_masks=groundtruth_masks)
return scores, tp_fp_labels, is_class_correctly_detected_in_image
def _compute_cor_loc(self, detected_boxes, detected_scores,
detected_class_labels, groundtruth_boxes,
groundtruth_class_labels):
groundtruth_class_labels, detected_masks=None,
groundtruth_masks=None):
"""Compute CorLoc score for object detection result.
Args:
......@@ -116,28 +139,51 @@ class PerImageEvaluation(object):
regions of object instances in ground truth
groundtruth_class_labels: An integer numpy array of shape [M, 1],
representing M class labels of object instances in ground truth
detected_masks: (optional) A uint8 numpy array of shape
[N, height, width]. If not None, the scores will be computed based
on masks.
groundtruth_masks: (optional) A uint8 numpy array of shape
[M, height, width].
Returns:
is_class_correctly_detected_in_image: a numpy integer array of
shape [C, 1], indicating whether the correponding class has a least
one instance being correctly detected in the image
Raises:
ValueError: If detected masks is not None but groundtruth masks are None,
or the other way around.
"""
if (detected_masks is not None and
groundtruth_masks is None) or (detected_masks is None and
groundtruth_masks is not None):
raise ValueError(
'If `detected_masks` is provided, then `groundtruth_masks` should '
'also be provided.'
)
is_class_correctly_detected_in_image = np.zeros(
self.num_groundtruth_classes, dtype=int)
for i in range(self.num_groundtruth_classes):
gt_boxes_at_ith_class = groundtruth_boxes[groundtruth_class_labels ==
i, :]
detected_boxes_at_ith_class = detected_boxes[detected_class_labels ==
i, :]
detected_scores_at_ith_class = detected_scores[detected_class_labels == i]
(gt_boxes_at_ith_class, gt_masks_at_ith_class,
detected_boxes_at_ith_class, detected_scores_at_ith_class,
detected_masks_at_ith_class) = self._get_ith_class_arrays(
detected_boxes, detected_scores, detected_masks,
detected_class_labels, groundtruth_boxes, groundtruth_masks,
groundtruth_class_labels, i)
is_class_correctly_detected_in_image[i] = (
self._compute_is_aclass_correctly_detected_in_image(
detected_boxes_at_ith_class, detected_scores_at_ith_class,
gt_boxes_at_ith_class))
self._compute_is_class_correctly_detected_in_image(
detected_boxes=detected_boxes_at_ith_class,
detected_scores=detected_scores_at_ith_class,
groundtruth_boxes=gt_boxes_at_ith_class,
detected_masks=detected_masks_at_ith_class,
groundtruth_masks=gt_masks_at_ith_class))
return is_class_correctly_detected_in_image
def _compute_is_aclass_correctly_detected_in_image(
self, detected_boxes, detected_scores, groundtruth_boxes):
def _compute_is_class_correctly_detected_in_image(
self, detected_boxes, detected_scores, groundtruth_boxes,
detected_masks=None, groundtruth_masks=None):
"""Compute CorLoc score for a single class.
Args:
......@@ -147,6 +193,11 @@ class PerImageEvaluation(object):
score
groundtruth_boxes: A numpy array of shape [M, 4] representing ground truth
box coordinates
detected_masks: (optional) A np.uint8 numpy array of shape
[N, height, width]. If not None, the scores will be computed based
on masks.
groundtruth_masks: (optional) A np.uint8 numpy array of shape
[M, height, width].
Returns:
is_class_correctly_detected_in_image: An integer 1 or 0 denoting whether a
......@@ -155,18 +206,30 @@ class PerImageEvaluation(object):
if detected_boxes.size > 0:
if groundtruth_boxes.size > 0:
max_score_id = np.argmax(detected_scores)
detected_boxlist = np_box_list.BoxList(
np.expand_dims(detected_boxes[max_score_id, :], axis=0))
gt_boxlist = np_box_list.BoxList(groundtruth_boxes)
iou = np_box_list_ops.iou(detected_boxlist, gt_boxlist)
mask_mode = False
if detected_masks is not None and groundtruth_masks is not None:
mask_mode = True
if mask_mode:
detected_boxlist = np_box_mask_list.BoxMaskList(
box_data=np.expand_dims(detected_boxes[max_score_id], axis=0),
mask_data=np.expand_dims(detected_masks[max_score_id], axis=0))
gt_boxlist = np_box_mask_list.BoxMaskList(
box_data=groundtruth_boxes, mask_data=groundtruth_masks)
iou = np_box_mask_list_ops.iou(detected_boxlist, gt_boxlist)
else:
detected_boxlist = np_box_list.BoxList(
np.expand_dims(detected_boxes[max_score_id, :], axis=0))
gt_boxlist = np_box_list.BoxList(groundtruth_boxes)
iou = np_box_list_ops.iou(detected_boxlist, gt_boxlist)
if np.max(iou) >= self.matching_iou_threshold:
return 1
return 0
def _compute_tp_fp(self, detected_boxes, detected_scores,
detected_class_labels, groundtruth_boxes,
groundtruth_class_labels, groundtruth_is_difficult_lists,
groundtruth_is_group_of_list):
groundtruth_class_labels, groundtruth_is_difficult_list,
groundtruth_is_group_of_list,
detected_masks=None, groundtruth_masks=None):
"""Labels true/false positives of detections of an image across all classes.
Args:
......@@ -181,10 +244,15 @@ class PerImageEvaluation(object):
regions of object instances in ground truth
groundtruth_class_labels: An integer numpy array of shape [M, 1],
representing M class labels of object instances in ground truth
groundtruth_is_difficult_lists: A boolean numpy array of length M denoting
groundtruth_is_difficult_list: A boolean numpy array of length M denoting
whether a ground truth box is a difficult instance or not
groundtruth_is_group_of_list: A boolean numpy array of length M denoting
whether a ground truth box has group-of tag
detected_masks: (optional) A np.uint8 numpy array of shape
[N, height, width]. If not None, the scores will be computed based
on masks.
groundtruth_masks: (optional) A np.uint8 numpy array of shape
[M, height, width].
Returns:
result_scores: A list of float numpy arrays. Each numpy array is of
......@@ -193,37 +261,134 @@ class PerImageEvaluation(object):
result_tp_fp_labels: A list of boolean numpy array. Each numpy array is of
shape [K, 1], representing K True/False positive label of object
instances detected with class label c
Raises:
ValueError: If detected masks is not None but groundtruth masks are None,
or the other way around.
"""
if detected_masks is not None and groundtruth_masks is None:
raise ValueError(
'Detected masks is available but groundtruth masks is not.')
if detected_masks is None and groundtruth_masks is not None:
raise ValueError(
'Groundtruth masks is available but detected masks is not.')
result_scores = []
result_tp_fp_labels = []
for i in range(self.num_groundtruth_classes):
gt_boxes_at_ith_class = groundtruth_boxes[(groundtruth_class_labels == i
), :]
groundtruth_is_difficult_list_at_ith_class = (
groundtruth_is_difficult_lists[groundtruth_class_labels == i])
groundtruth_is_difficult_list[groundtruth_class_labels == i])
groundtruth_is_group_of_list_at_ith_class = (
groundtruth_is_group_of_list[groundtruth_class_labels == i])
detected_boxes_at_ith_class = detected_boxes[(detected_class_labels == i
), :]
detected_scores_at_ith_class = detected_scores[detected_class_labels == i]
(gt_boxes_at_ith_class, gt_masks_at_ith_class,
detected_boxes_at_ith_class, detected_scores_at_ith_class,
detected_masks_at_ith_class) = self._get_ith_class_arrays(
detected_boxes, detected_scores, detected_masks,
detected_class_labels, groundtruth_boxes, groundtruth_masks,
groundtruth_class_labels, i)
scores, tp_fp_labels = self._compute_tp_fp_for_single_class(
detected_boxes_at_ith_class, detected_scores_at_ith_class,
gt_boxes_at_ith_class, groundtruth_is_difficult_list_at_ith_class,
groundtruth_is_group_of_list_at_ith_class)
detected_boxes=detected_boxes_at_ith_class,
detected_scores=detected_scores_at_ith_class,
groundtruth_boxes=gt_boxes_at_ith_class,
groundtruth_is_difficult_list=
groundtruth_is_difficult_list_at_ith_class,
groundtruth_is_group_of_list=
groundtruth_is_group_of_list_at_ith_class,
detected_masks=detected_masks_at_ith_class,
groundtruth_masks=gt_masks_at_ith_class)
result_scores.append(scores)
result_tp_fp_labels.append(tp_fp_labels)
return result_scores, result_tp_fp_labels
def _remove_invalid_boxes(self, detected_boxes, detected_scores,
detected_class_labels):
valid_indices = np.logical_and(detected_boxes[:, 0] < detected_boxes[:, 2],
detected_boxes[:, 1] < detected_boxes[:, 3])
return (detected_boxes[valid_indices, :], detected_scores[valid_indices],
detected_class_labels[valid_indices])
def _get_overlaps_and_scores_mask_mode(
self, detected_boxes, detected_scores, detected_masks, groundtruth_boxes,
groundtruth_masks, groundtruth_is_group_of_list):
"""Computes overlaps and scores between detected and groudntruth masks.
Args:
detected_boxes: A numpy array of shape [N, 4] representing detected box
coordinates
detected_scores: A 1-d numpy array of length N representing classification
score
detected_masks: A uint8 numpy array of shape [N, height, width]. If not
None, the scores will be computed based on masks.
groundtruth_boxes: A numpy array of shape [M, 4] representing ground truth
box coordinates
groundtruth_masks: A uint8 numpy array of shape [M, height, width].
groundtruth_is_group_of_list: A boolean numpy array of length M denoting
whether a ground truth box has group-of tag. If a groundtruth box
is group-of box, every detection matching this box is ignored.
Returns:
iou: A float numpy array of size [num_detected_boxes, num_gt_boxes]. If
gt_non_group_of_boxlist.num_boxes() == 0 it will be None.
ioa: A float numpy array of size [num_detected_boxes, num_gt_boxes]. If
gt_group_of_boxlist.num_boxes() == 0 it will be None.
scores: The score of the detected boxlist.
num_boxes: Number of non-maximum suppressed detected boxes.
"""
detected_boxlist = np_box_mask_list.BoxMaskList(
box_data=detected_boxes, mask_data=detected_masks)
detected_boxlist.add_field('scores', detected_scores)
detected_boxlist = np_box_mask_list_ops.non_max_suppression(
detected_boxlist, self.nms_max_output_boxes, self.nms_iou_threshold)
gt_non_group_of_boxlist = np_box_mask_list.BoxMaskList(
box_data=groundtruth_boxes[~groundtruth_is_group_of_list],
mask_data=groundtruth_masks[~groundtruth_is_group_of_list])
gt_group_of_boxlist = np_box_mask_list.BoxMaskList(
box_data=groundtruth_boxes[groundtruth_is_group_of_list],
mask_data=groundtruth_masks[groundtruth_is_group_of_list])
iou = np_box_mask_list_ops.iou(detected_boxlist, gt_non_group_of_boxlist)
ioa = np_box_mask_list_ops.ioa(gt_group_of_boxlist, detected_boxlist)
scores = detected_boxlist.get_field('scores')
num_boxes = detected_boxlist.num_boxes()
return iou, ioa, scores, num_boxes
def _get_overlaps_and_scores_box_mode(
self,
detected_boxes,
detected_scores,
groundtruth_boxes,
groundtruth_is_group_of_list):
"""Computes overlaps and scores between detected and groudntruth boxes.
Args:
detected_boxes: A numpy array of shape [N, 4] representing detected box
coordinates
detected_scores: A 1-d numpy array of length N representing classification
score
groundtruth_boxes: A numpy array of shape [M, 4] representing ground truth
box coordinates
groundtruth_is_group_of_list: A boolean numpy array of length M denoting
whether a ground truth box has group-of tag. If a groundtruth box
is group-of box, every detection matching this box is ignored.
Returns:
iou: A float numpy array of size [num_detected_boxes, num_gt_boxes]. If
gt_non_group_of_boxlist.num_boxes() == 0 it will be None.
ioa: A float numpy array of size [num_detected_boxes, num_gt_boxes]. If
gt_group_of_boxlist.num_boxes() == 0 it will be None.
scores: The score of the detected boxlist.
num_boxes: Number of non-maximum suppressed detected boxes.
"""
detected_boxlist = np_box_list.BoxList(detected_boxes)
detected_boxlist.add_field('scores', detected_scores)
detected_boxlist = np_box_list_ops.non_max_suppression(
detected_boxlist, self.nms_max_output_boxes, self.nms_iou_threshold)
gt_non_group_of_boxlist = np_box_list.BoxList(
groundtruth_boxes[~groundtruth_is_group_of_list])
gt_group_of_boxlist = np_box_list.BoxList(
groundtruth_boxes[groundtruth_is_group_of_list])
iou = np_box_list_ops.iou(detected_boxlist, gt_non_group_of_boxlist)
ioa = np_box_list_ops.ioa(gt_group_of_boxlist, detected_boxlist)
scores = detected_boxlist.get_field('scores')
num_boxes = detected_boxlist.num_boxes()
return iou, ioa, scores, num_boxes
def _compute_tp_fp_for_single_class(
self, detected_boxes, detected_scores, groundtruth_boxes,
groundtruth_is_difficult_list, groundtruth_is_group_of_list):
groundtruth_is_difficult_list, groundtruth_is_group_of_list,
detected_masks=None, groundtruth_masks=None):
"""Labels boxes detected with the same class from the same image as tp/fp.
Args:
......@@ -240,6 +405,11 @@ class PerImageEvaluation(object):
groundtruth_is_group_of_list: A boolean numpy array of length M denoting
whether a ground truth box has group-of tag. If a groundtruth box
is group-of box, every detection matching this box is ignored.
detected_masks: (optional) A uint8 numpy array of shape
[N, height, width]. If not None, the scores will be computed based
on masks.
groundtruth_masks: (optional) A uint8 numpy array of shape
[M, height, width].
Returns:
Two arrays of the same size, containing all boxes that were evaluated as
......@@ -249,25 +419,37 @@ class PerImageEvaluation(object):
scores: A numpy array representing the detection scores.
tp_fp_labels: a boolean numpy array indicating whether a detection is a
true positive.
"""
if detected_boxes.size == 0:
return np.array([], dtype=float), np.array([], dtype=bool)
detected_boxlist = np_box_list.BoxList(detected_boxes)
detected_boxlist.add_field('scores', detected_scores)
detected_boxlist = np_box_list_ops.non_max_suppression(
detected_boxlist, self.nms_max_output_boxes, self.nms_iou_threshold)
scores = detected_boxlist.get_field('scores')
mask_mode = False
if detected_masks is not None and groundtruth_masks is not None:
mask_mode = True
if mask_mode:
(iou, ioa, scores,
num_detected_boxes) = self._get_overlaps_and_scores_mask_mode(
detected_boxes=detected_boxes,
detected_scores=detected_scores,
detected_masks=detected_masks,
groundtruth_boxes=groundtruth_boxes,
groundtruth_masks=groundtruth_masks,
groundtruth_is_group_of_list=groundtruth_is_group_of_list)
else:
(iou, ioa, scores,
num_detected_boxes) = self._get_overlaps_and_scores_box_mode(
detected_boxes=detected_boxes,
detected_scores=detected_scores,
groundtruth_boxes=groundtruth_boxes,
groundtruth_is_group_of_list=groundtruth_is_group_of_list)
if groundtruth_boxes.size == 0:
return scores, np.zeros(detected_boxlist.num_boxes(), dtype=bool)
return scores, np.zeros(num_detected_boxes, dtype=bool)
tp_fp_labels = np.zeros(detected_boxlist.num_boxes(), dtype=bool)
is_matched_to_difficult_box = np.zeros(
detected_boxlist.num_boxes(), dtype=bool)
is_matched_to_group_of_box = np.zeros(
detected_boxlist.num_boxes(), dtype=bool)
tp_fp_labels = np.zeros(num_detected_boxes, dtype=bool)
is_matched_to_difficult_box = np.zeros(num_detected_boxes, dtype=bool)
is_matched_to_group_of_box = np.zeros(num_detected_boxes, dtype=bool)
# The evaluation is done in two stages:
# 1. All detections are matched to non group-of boxes; true positives are
......@@ -276,16 +458,12 @@ class PerImageEvaluation(object):
# group-of boxes and ignored if matched.
# Tp-fp evaluation for non-group of boxes (if any).
gt_non_group_of_boxlist = np_box_list.BoxList(
groundtruth_boxes[~groundtruth_is_group_of_list, :])
if gt_non_group_of_boxlist.num_boxes() > 0:
if iou.shape[1] > 0:
groundtruth_nongroup_of_is_difficult_list = groundtruth_is_difficult_list[
~groundtruth_is_group_of_list]
iou = np_box_list_ops.iou(detected_boxlist, gt_non_group_of_boxlist)
max_overlap_gt_ids = np.argmax(iou, axis=1)
is_gt_box_detected = np.zeros(
gt_non_group_of_boxlist.num_boxes(), dtype=bool)
for i in range(detected_boxlist.num_boxes()):
is_gt_box_detected = np.zeros(iou.shape[1], dtype=bool)
for i in range(num_detected_boxes):
gt_id = max_overlap_gt_ids[i]
if iou[i, gt_id] >= self.matching_iou_threshold:
if not groundtruth_nongroup_of_is_difficult_list[gt_id]:
......@@ -296,12 +474,9 @@ class PerImageEvaluation(object):
is_matched_to_difficult_box[i] = True
# Tp-fp evaluation for group of boxes.
gt_group_of_boxlist = np_box_list.BoxList(
groundtruth_boxes[groundtruth_is_group_of_list, :])
if gt_group_of_boxlist.num_boxes() > 0:
ioa = np_box_list_ops.ioa(gt_group_of_boxlist, detected_boxlist)
if ioa.shape[0] > 0:
max_overlap_group_of_gt = np.max(ioa, axis=0)
for i in range(detected_boxlist.num_boxes()):
for i in range(num_detected_boxes):
if (not tp_fp_labels[i] and not is_matched_to_difficult_box[i] and
max_overlap_group_of_gt[i] >= self.matching_iou_threshold):
is_matched_to_group_of_box[i] = True
......@@ -310,3 +485,83 @@ class PerImageEvaluation(object):
& ~is_matched_to_group_of_box], tp_fp_labels[
~is_matched_to_difficult_box
& ~is_matched_to_group_of_box]
def _get_ith_class_arrays(self, detected_boxes, detected_scores,
detected_masks, detected_class_labels,
groundtruth_boxes, groundtruth_masks,
groundtruth_class_labels, class_index):
"""Returns numpy arrays belonging to class with index `class_index`.
Args:
detected_boxes: A numpy array containing detected boxes.
detected_scores: A numpy array containing detected scores.
detected_masks: A numpy array containing detected masks.
detected_class_labels: A numpy array containing detected class labels.
groundtruth_boxes: A numpy array containing groundtruth boxes.
groundtruth_masks: A numpy array containing groundtruth masks.
groundtruth_class_labels: A numpy array containing groundtruth class
labels.
class_index: An integer index.
Returns:
gt_boxes_at_ith_class: A numpy array containing groundtruth boxes labeled
as ith class.
gt_masks_at_ith_class: A numpy array containing groundtruth masks labeled
as ith class.
detected_boxes_at_ith_class: A numpy array containing detected boxes
corresponding to the ith class.
detected_scores_at_ith_class: A numpy array containing detected scores
corresponding to the ith class.
detected_masks_at_ith_class: A numpy array containing detected masks
corresponding to the ith class.
"""
selected_groundtruth = (groundtruth_class_labels == class_index)
gt_boxes_at_ith_class = groundtruth_boxes[selected_groundtruth]
if groundtruth_masks is not None:
gt_masks_at_ith_class = groundtruth_masks[selected_groundtruth]
else:
gt_masks_at_ith_class = None
selected_detections = (detected_class_labels == class_index)
detected_boxes_at_ith_class = detected_boxes[selected_detections]
detected_scores_at_ith_class = detected_scores[selected_detections]
if detected_masks is not None:
detected_masks_at_ith_class = detected_masks[selected_detections]
else:
detected_masks_at_ith_class = None
return (gt_boxes_at_ith_class, gt_masks_at_ith_class,
detected_boxes_at_ith_class, detected_scores_at_ith_class,
detected_masks_at_ith_class)
def _remove_invalid_boxes(self, detected_boxes, detected_scores,
detected_class_labels, detected_masks=None):
"""Removes entries with invalid boxes.
A box is invalid if either its xmax is smaller than its xmin, or its ymax
is smaller than its ymin.
Args:
detected_boxes: A float numpy array of size [num_boxes, 4] containing box
coordinates in [ymin, xmin, ymax, xmax] format.
detected_scores: A float numpy array of size [num_boxes].
detected_class_labels: A int32 numpy array of size [num_boxes].
detected_masks: A uint8 numpy array of size [num_boxes, height, width].
Returns:
valid_detected_boxes: A float numpy array of size [num_valid_boxes, 4]
containing box coordinates in [ymin, xmin, ymax, xmax] format.
valid_detected_scores: A float numpy array of size [num_valid_boxes].
valid_detected_class_labels: A int32 numpy array of size
[num_valid_boxes].
valid_detected_masks: A uint8 numpy array of size
[num_valid_boxes, height, width].
"""
valid_indices = np.logical_and(detected_boxes[:, 0] < detected_boxes[:, 2],
detected_boxes[:, 1] < detected_boxes[:, 3])
detected_boxes = detected_boxes[valid_indices]
detected_scores = detected_scores[valid_indices]
detected_class_labels = detected_class_labels[valid_indices]
if detected_masks is not None:
detected_masks = detected_masks[valid_indices]
return [
detected_boxes, detected_scores, detected_class_labels, detected_masks
]
......@@ -35,10 +35,29 @@ class SingleClassTpFpWithDifficultBoxesTest(tf.test.TestCase):
self.detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
dtype=float)
self.detected_scores = np.array([0.6, 0.8, 0.5], dtype=float)
detected_masks_0 = np.array([[0, 1, 1, 0],
[0, 0, 1, 0],
[0, 0, 0, 0]], dtype=np.uint8)
detected_masks_1 = np.array([[1, 0, 0, 0],
[1, 1, 0, 0],
[0, 0, 0, 0]], dtype=np.uint8)
detected_masks_2 = np.array([[0, 0, 0, 0],
[0, 1, 1, 0],
[0, 1, 0, 0]], dtype=np.uint8)
self.detected_masks = np.stack(
[detected_masks_0, detected_masks_1, detected_masks_2], axis=0)
self.groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 10, 10]],
dtype=float)
def test_match_to_not_difficult_box(self):
groundtruth_masks_0 = np.array([[1, 1, 0, 0],
[1, 1, 0, 0],
[0, 0, 0, 0]], dtype=np.uint8)
groundtruth_masks_1 = np.array([[0, 0, 0, 1],
[0, 0, 0, 1],
[0, 0, 0, 1]], dtype=np.uint8)
self.groundtruth_masks = np.stack(
[groundtruth_masks_0, groundtruth_masks_1], axis=0)
def test_match_to_gt_box_0(self):
groundtruth_groundtruth_is_difficult_list = np.array([False, True],
dtype=bool)
groundtruth_groundtruth_is_group_of_list = np.array(
......@@ -52,7 +71,25 @@ class SingleClassTpFpWithDifficultBoxesTest(tf.test.TestCase):
self.assertTrue(np.allclose(expected_scores, scores))
self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
def test_match_to_difficult_box(self):
def test_mask_match_to_gt_mask_0(self):
groundtruth_groundtruth_is_difficult_list = np.array([False, True],
dtype=bool)
groundtruth_groundtruth_is_group_of_list = np.array(
[False, False], dtype=bool)
scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class(
self.detected_boxes,
self.detected_scores,
self.groundtruth_boxes,
groundtruth_groundtruth_is_difficult_list,
groundtruth_groundtruth_is_group_of_list,
detected_masks=self.detected_masks,
groundtruth_masks=self.groundtruth_masks)
expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
expected_tp_fp_labels = np.array([True, False, False], dtype=bool)
self.assertTrue(np.allclose(expected_scores, scores))
self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
def test_match_to_gt_box_1(self):
groundtruth_groundtruth_is_difficult_list = np.array([True, False],
dtype=bool)
groundtruth_groundtruth_is_group_of_list = np.array(
......@@ -66,6 +103,24 @@ class SingleClassTpFpWithDifficultBoxesTest(tf.test.TestCase):
self.assertTrue(np.allclose(expected_scores, scores))
self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
def test_mask_match_to_gt_mask_1(self):
groundtruth_groundtruth_is_difficult_list = np.array([True, False],
dtype=bool)
groundtruth_groundtruth_is_group_of_list = np.array(
[False, False], dtype=bool)
scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class(
self.detected_boxes,
self.detected_scores,
self.groundtruth_boxes,
groundtruth_groundtruth_is_difficult_list,
groundtruth_groundtruth_is_group_of_list,
detected_masks=self.detected_masks,
groundtruth_masks=self.groundtruth_masks)
expected_scores = np.array([0.6, 0.5], dtype=float)
expected_tp_fp_labels = np.array([False, False], dtype=bool)
self.assertTrue(np.allclose(expected_scores, scores))
self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
class SingleClassTpFpWithGroupOfBoxesTest(tf.test.TestCase):
......@@ -81,8 +136,31 @@ class SingleClassTpFpWithGroupOfBoxesTest(tf.test.TestCase):
self.detected_boxes = np.array(
[[0, 0, 1, 1], [0, 0, 2, 1], [0, 0, 3, 1]], dtype=float)
self.detected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
detected_masks_0 = np.array([[0, 1, 1, 0],
[0, 0, 1, 0],
[0, 0, 0, 0]], dtype=np.uint8)
detected_masks_1 = np.array([[1, 0, 0, 0],
[1, 1, 0, 0],
[0, 0, 0, 0]], dtype=np.uint8)
detected_masks_2 = np.array([[0, 0, 0, 0],
[0, 1, 1, 0],
[0, 1, 0, 0]], dtype=np.uint8)
self.detected_masks = np.stack(
[detected_masks_0, detected_masks_1, detected_masks_2], axis=0)
self.groundtruth_boxes = np.array(
[[0, 0, 1, 1], [0, 0, 5, 5], [10, 10, 20, 20]], dtype=float)
groundtruth_masks_0 = np.array([[1, 0, 0, 0],
[1, 0, 0, 0],
[1, 0, 0, 0]], dtype=np.uint8)
groundtruth_masks_1 = np.array([[0, 0, 1, 0],
[0, 0, 1, 0],
[0, 0, 1, 0]], dtype=np.uint8)
groundtruth_masks_2 = np.array([[0, 1, 0, 0],
[0, 1, 0, 0],
[0, 1, 0, 0]], dtype=np.uint8)
self.groundtruth_masks = np.stack(
[groundtruth_masks_0, groundtruth_masks_1, groundtruth_masks_2], axis=0)
def test_match_to_non_group_of_and_group_of_box(self):
groundtruth_groundtruth_is_difficult_list = np.array(
......@@ -98,6 +176,24 @@ class SingleClassTpFpWithGroupOfBoxesTest(tf.test.TestCase):
self.assertTrue(np.allclose(expected_scores, scores))
self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
def test_mask_match_to_non_group_of_and_group_of_box(self):
groundtruth_groundtruth_is_difficult_list = np.array(
[False, False, False], dtype=bool)
groundtruth_groundtruth_is_group_of_list = np.array(
[False, True, True], dtype=bool)
expected_scores = np.array([0.6], dtype=float)
expected_tp_fp_labels = np.array([True], dtype=bool)
scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class(
self.detected_boxes,
self.detected_scores,
self.groundtruth_boxes,
groundtruth_groundtruth_is_difficult_list,
groundtruth_groundtruth_is_group_of_list,
detected_masks=self.detected_masks,
groundtruth_masks=self.groundtruth_masks)
self.assertTrue(np.allclose(expected_scores, scores))
self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
def test_match_two_to_group_of_box(self):
groundtruth_groundtruth_is_difficult_list = np.array(
[False, False, False], dtype=bool)
......@@ -112,32 +208,61 @@ class SingleClassTpFpWithGroupOfBoxesTest(tf.test.TestCase):
self.assertTrue(np.allclose(expected_scores, scores))
self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
def test_mask_match_two_to_group_of_box(self):
groundtruth_groundtruth_is_difficult_list = np.array(
[False, False, False], dtype=bool)
groundtruth_groundtruth_is_group_of_list = np.array(
[True, False, True], dtype=bool)
expected_scores = np.array([0.8], dtype=float)
expected_tp_fp_labels = np.array([True], dtype=bool)
scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class(
self.detected_boxes,
self.detected_scores,
self.groundtruth_boxes,
groundtruth_groundtruth_is_difficult_list,
groundtruth_groundtruth_is_group_of_list,
detected_masks=self.detected_masks,
groundtruth_masks=self.groundtruth_masks)
self.assertTrue(np.allclose(expected_scores, scores))
self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
class SingleClassTpFpNoDifficultBoxesTest(tf.test.TestCase):
def setUp(self):
num_groundtruth_classes = 1
matching_iou_threshold1 = 0.5
matching_iou_threshold2 = 0.1
matching_iou_threshold_high_iou = 0.5
matching_iou_threshold_low_iou = 0.1
nms_iou_threshold = 1.0
nms_max_output_boxes = 10000
self.eval1 = per_image_evaluation.PerImageEvaluation(
num_groundtruth_classes, matching_iou_threshold1, nms_iou_threshold,
nms_max_output_boxes)
self.eval_high_iou = per_image_evaluation.PerImageEvaluation(
num_groundtruth_classes, matching_iou_threshold_high_iou,
nms_iou_threshold, nms_max_output_boxes)
self.eval2 = per_image_evaluation.PerImageEvaluation(
num_groundtruth_classes, matching_iou_threshold2, nms_iou_threshold,
nms_max_output_boxes)
self.eval_low_iou = per_image_evaluation.PerImageEvaluation(
num_groundtruth_classes, matching_iou_threshold_low_iou,
nms_iou_threshold, nms_max_output_boxes)
self.detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
dtype=float)
self.detected_scores = np.array([0.6, 0.8, 0.5], dtype=float)
detected_masks_0 = np.array([[0, 1, 1, 0],
[0, 0, 1, 0],
[0, 0, 0, 0]], dtype=np.uint8)
detected_masks_1 = np.array([[1, 0, 0, 0],
[1, 1, 0, 0],
[0, 0, 0, 0]], dtype=np.uint8)
detected_masks_2 = np.array([[0, 0, 0, 0],
[0, 1, 1, 0],
[0, 1, 0, 0]], dtype=np.uint8)
self.detected_masks = np.stack(
[detected_masks_0, detected_masks_1, detected_masks_2], axis=0)
def test_no_true_positives(self):
groundtruth_boxes = np.array([[100, 100, 105, 105]], dtype=float)
groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool)
groundtruth_groundtruth_is_group_of_list = np.array([False], dtype=bool)
scores, tp_fp_labels = self.eval1._compute_tp_fp_for_single_class(
scores, tp_fp_labels = self.eval_high_iou._compute_tp_fp_for_single_class(
self.detected_boxes, self.detected_scores, groundtruth_boxes,
groundtruth_groundtruth_is_difficult_list,
groundtruth_groundtruth_is_group_of_list)
......@@ -146,11 +271,32 @@ class SingleClassTpFpNoDifficultBoxesTest(tf.test.TestCase):
self.assertTrue(np.allclose(expected_scores, scores))
self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
def test_mask_no_true_positives(self):
groundtruth_boxes = np.array([[100, 100, 105, 105]], dtype=float)
groundtruth_masks_0 = np.array([[1, 1, 1, 1],
[1, 1, 1, 1],
[1, 1, 1, 1]], dtype=np.uint8)
groundtruth_masks = np.stack([groundtruth_masks_0], axis=0)
groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool)
groundtruth_groundtruth_is_group_of_list = np.array([False], dtype=bool)
scores, tp_fp_labels = self.eval_high_iou._compute_tp_fp_for_single_class(
self.detected_boxes,
self.detected_scores,
groundtruth_boxes,
groundtruth_groundtruth_is_difficult_list,
groundtruth_groundtruth_is_group_of_list,
detected_masks=self.detected_masks,
groundtruth_masks=groundtruth_masks)
expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
expected_tp_fp_labels = np.array([False, False, False], dtype=bool)
self.assertTrue(np.allclose(expected_scores, scores))
self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
def test_one_true_positives_with_large_iou_threshold(self):
groundtruth_boxes = np.array([[0, 0, 1, 1]], dtype=float)
groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool)
groundtruth_groundtruth_is_group_of_list = np.array([False], dtype=bool)
scores, tp_fp_labels = self.eval1._compute_tp_fp_for_single_class(
scores, tp_fp_labels = self.eval_high_iou._compute_tp_fp_for_single_class(
self.detected_boxes, self.detected_scores, groundtruth_boxes,
groundtruth_groundtruth_is_difficult_list,
groundtruth_groundtruth_is_group_of_list)
......@@ -159,11 +305,32 @@ class SingleClassTpFpNoDifficultBoxesTest(tf.test.TestCase):
self.assertTrue(np.allclose(expected_scores, scores))
self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
def test_mask_one_true_positives_with_large_iou_threshold(self):
groundtruth_boxes = np.array([[0, 0, 1, 1]], dtype=float)
groundtruth_masks_0 = np.array([[1, 0, 0, 0],
[1, 1, 0, 0],
[0, 0, 0, 0]], dtype=np.uint8)
groundtruth_masks = np.stack([groundtruth_masks_0], axis=0)
groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool)
groundtruth_groundtruth_is_group_of_list = np.array([False], dtype=bool)
scores, tp_fp_labels = self.eval_high_iou._compute_tp_fp_for_single_class(
self.detected_boxes,
self.detected_scores,
groundtruth_boxes,
groundtruth_groundtruth_is_difficult_list,
groundtruth_groundtruth_is_group_of_list,
detected_masks=self.detected_masks,
groundtruth_masks=groundtruth_masks)
expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
expected_tp_fp_labels = np.array([True, False, False], dtype=bool)
self.assertTrue(np.allclose(expected_scores, scores))
self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
def test_one_true_positives_with_very_small_iou_threshold(self):
groundtruth_boxes = np.array([[0, 0, 1, 1]], dtype=float)
groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool)
groundtruth_groundtruth_is_group_of_list = np.array([False], dtype=bool)
scores, tp_fp_labels = self.eval2._compute_tp_fp_for_single_class(
scores, tp_fp_labels = self.eval_low_iou._compute_tp_fp_for_single_class(
self.detected_boxes, self.detected_scores, groundtruth_boxes,
groundtruth_groundtruth_is_difficult_list,
groundtruth_groundtruth_is_group_of_list)
......@@ -177,7 +344,7 @@ class SingleClassTpFpNoDifficultBoxesTest(tf.test.TestCase):
groundtruth_groundtruth_is_difficult_list = np.zeros(2, dtype=bool)
groundtruth_groundtruth_is_group_of_list = np.array(
[False, False], dtype=bool)
scores, tp_fp_labels = self.eval1._compute_tp_fp_for_single_class(
scores, tp_fp_labels = self.eval_high_iou._compute_tp_fp_for_single_class(
self.detected_boxes, self.detected_scores, groundtruth_boxes,
groundtruth_groundtruth_is_difficult_list,
groundtruth_groundtruth_is_group_of_list)
......
......@@ -17,6 +17,8 @@
import tensorflow as tf
from object_detection.utils import static_shape
def _is_tensor(t):
"""Returns a boolean indicating whether the input is a tensor.
......@@ -125,12 +127,183 @@ def combined_static_and_dynamic_shape(tensor):
Returns:
A list of size tensor.shape.ndims containing integers or a scalar tensor.
"""
static_shape = tensor.shape.as_list()
dynamic_shape = tf.shape(tensor)
static_tensor_shape = tensor.shape.as_list()
dynamic_tensor_shape = tf.shape(tensor)
combined_shape = []
for index, dim in enumerate(static_shape):
for index, dim in enumerate(static_tensor_shape):
if dim is not None:
combined_shape.append(dim)
else:
combined_shape.append(dynamic_shape[index])
combined_shape.append(dynamic_tensor_shape[index])
return combined_shape
def static_or_dynamic_map_fn(fn, elems, dtype=None,
parallel_iterations=32, back_prop=True):
"""Runs map_fn as a (static) for loop when possible.
This function rewrites the map_fn as an explicit unstack input -> for loop
over function calls -> stack result combination. This allows our graphs to
be acyclic when the batch size is static.
For comparison, see https://www.tensorflow.org/api_docs/python/tf/map_fn.
Note that `static_or_dynamic_map_fn` currently is not *fully* interchangeable
with the default tf.map_fn function as it does not accept nested inputs (only
Tensors or lists of Tensors). Likewise, the output of `fn` can only be a
Tensor or list of Tensors.
TODO: make this function fully interchangeable with tf.map_fn.
Args:
fn: The callable to be performed. It accepts one argument, which will have
the same structure as elems. Its output must have the
same structure as elems.
elems: A tensor or list of tensors, each of which will
be unpacked along their first dimension. The sequence of the
resulting slices will be applied to fn.
dtype: (optional) The output type(s) of fn. If fn returns a structure of
Tensors differing from the structure of elems, then dtype is not optional
and must have the same structure as the output of fn.
parallel_iterations: (optional) number of batch items to process in
parallel. This flag is only used if the native tf.map_fn is used
and defaults to 32 instead of 10 (unlike the standard tf.map_fn default).
back_prop: (optional) True enables support for back propagation.
This flag is only used if the native tf.map_fn is used.
Returns:
A tensor or sequence of tensors. Each tensor packs the
results of applying fn to tensors unpacked from elems along the first
dimension, from first to last.
Raises:
ValueError: if `elems` a Tensor or a list of Tensors.
ValueError: if `fn` does not return a Tensor or list of Tensors
"""
if isinstance(elems, list):
for elem in elems:
if not isinstance(elem, tf.Tensor):
raise ValueError('`elems` must be a Tensor or list of Tensors.')
elem_shapes = [elem.shape.as_list() for elem in elems]
# Fall back on tf.map_fn if shapes of each entry of `elems` are None or fail
# to all be the same size along the batch dimension.
for elem_shape in elem_shapes:
if (not elem_shape or not elem_shape[0]
or elem_shape[0] != elem_shapes[0][0]):
return tf.map_fn(fn, elems, dtype, parallel_iterations, back_prop)
arg_tuples = zip(*[tf.unstack(elem) for elem in elems])
outputs = [fn(arg_tuple) for arg_tuple in arg_tuples]
else:
if not isinstance(elems, tf.Tensor):
raise ValueError('`elems` must be a Tensor or list of Tensors.')
elems_shape = elems.shape.as_list()
if not elems_shape or not elems_shape[0]:
return tf.map_fn(fn, elems, dtype, parallel_iterations, back_prop)
outputs = [fn(arg) for arg in tf.unstack(elems)]
# Stack `outputs`, which is a list of Tensors or list of lists of Tensors
if all([isinstance(output, tf.Tensor) for output in outputs]):
return tf.stack(outputs)
else:
if all([isinstance(output, list) for output in outputs]):
if all([all(
[isinstance(entry, tf.Tensor) for entry in output_list])
for output_list in outputs]):
return [tf.stack(output_tuple) for output_tuple in zip(*outputs)]
raise ValueError('`fn` should return a Tensor or a list of Tensors.')
def check_min_image_dim(min_dim, image_tensor):
"""Checks that the image width/height are greater than some number.
This function is used to check that the width and height of an image are above
a certain value. If the image shape is static, this function will perform the
check at graph construction time. Otherwise, if the image shape varies, an
Assertion control dependency will be added to the graph.
Args:
min_dim: The minimum number of pixels along the width and height of the
image.
image_tensor: The image tensor to check size for.
Returns:
If `image_tensor` has dynamic size, return `image_tensor` with a Assert
control dependency. Otherwise returns image_tensor.
Raises:
ValueError: if `image_tensor`'s' width or height is smaller than `min_dim`.
"""
image_shape = image_tensor.get_shape()
image_height = static_shape.get_height(image_shape)
image_width = static_shape.get_width(image_shape)
if image_height is None or image_width is None:
shape_assert = tf.Assert(
tf.logical_and(tf.greater_equal(tf.shape(image_tensor)[1], min_dim),
tf.greater_equal(tf.shape(image_tensor)[2], min_dim)),
['image size must be >= {} in both height and width.'.format(min_dim)])
with tf.control_dependencies([shape_assert]):
return tf.identity(image_tensor)
if image_height < min_dim or image_width < min_dim:
raise ValueError(
'image size must be >= %d in both height and width; image dim = %d,%d' %
(min_dim, image_height, image_width))
return image_tensor
def assert_shape_equal(shape_a, shape_b):
"""Asserts that shape_a and shape_b are equal.
If the shapes are static, raises a ValueError when the shapes
mismatch.
If the shapes are dynamic, raises a tf InvalidArgumentError when the shapes
mismatch.
Args:
shape_a: a list containing shape of the first tensor.
shape_b: a list containing shape of the second tensor.
Returns:
Either a tf.no_op() when shapes are all static and a tf.assert_equal() op
when the shapes are dynamic.
Raises:
ValueError: When shapes are both static and unequal.
"""
if (all(isinstance(dim, int) for dim in shape_a) and
all(isinstance(dim, int) for dim in shape_b)):
if shape_a != shape_b:
raise ValueError('Unequal shapes {}, {}'.format(shape_a, shape_b))
else: return tf.no_op()
else:
return tf.assert_equal(shape_a, shape_b)
def assert_shape_equal_along_first_dimension(shape_a, shape_b):
"""Asserts that shape_a and shape_b are the same along the 0th-dimension.
If the shapes are static, raises a ValueError when the shapes
mismatch.
If the shapes are dynamic, raises a tf InvalidArgumentError when the shapes
mismatch.
Args:
shape_a: a list containing shape of the first tensor.
shape_b: a list containing shape of the second tensor.
Returns:
Either a tf.no_op() when shapes are all static and a tf.assert_equal() op
when the shapes are dynamic.
Raises:
ValueError: When shapes are both static and unequal.
"""
if isinstance(shape_a[0], int) and isinstance(shape_b[0], int):
if shape_a[0] != shape_b[0]:
raise ValueError('Unequal first dimension {}, {}'.format(
shape_a[0], shape_b[0]))
else: return tf.no_op()
else:
return tf.assert_equal(shape_a[0], shape_b[0])
......@@ -15,6 +15,7 @@
"""Tests for object_detection.utils.shape_utils."""
import numpy as np
import tensorflow as tf
from object_detection.utils import shape_utils
......@@ -123,5 +124,198 @@ class UtilTest(tf.test.TestCase):
self.assertListEqual(combined_shape[1:], [2, 3])
class StaticOrDynamicMapFnTest(tf.test.TestCase):
def test_with_dynamic_shape(self):
def fn(input_tensor):
return tf.reduce_sum(input_tensor)
input_tensor = tf.placeholder(tf.float32, shape=(None, 2))
map_fn_output = shape_utils.static_or_dynamic_map_fn(fn, input_tensor)
op_names = [op.name for op in tf.get_default_graph().get_operations()]
self.assertTrue(any(['map' == op_name[:3] for op_name in op_names]))
with self.test_session() as sess:
result1 = sess.run(
map_fn_output, feed_dict={
input_tensor: [[1, 2], [3, 1], [0, 4]]})
result2 = sess.run(
map_fn_output, feed_dict={
input_tensor: [[-1, 1], [0, 9]]})
self.assertAllEqual(result1, [3, 4, 4])
self.assertAllEqual(result2, [0, 9])
def test_with_static_shape(self):
def fn(input_tensor):
return tf.reduce_sum(input_tensor)
input_tensor = tf.constant([[1, 2], [3, 1], [0, 4]], dtype=tf.float32)
map_fn_output = shape_utils.static_or_dynamic_map_fn(fn, input_tensor)
op_names = [op.name for op in tf.get_default_graph().get_operations()]
self.assertTrue(all(['map' != op_name[:3] for op_name in op_names]))
with self.test_session() as sess:
result = sess.run(map_fn_output)
self.assertAllEqual(result, [3, 4, 4])
def test_with_multiple_dynamic_shapes(self):
def fn(elems):
input_tensor, scalar_index_tensor = elems
return tf.reshape(tf.slice(input_tensor, scalar_index_tensor, [1]), [])
input_tensor = tf.placeholder(tf.float32, shape=(None, 3))
scalar_index_tensor = tf.placeholder(tf.int32, shape=(None, 1))
map_fn_output = shape_utils.static_or_dynamic_map_fn(
fn, [input_tensor, scalar_index_tensor], dtype=tf.float32)
op_names = [op.name for op in tf.get_default_graph().get_operations()]
self.assertTrue(any(['map' == op_name[:3] for op_name in op_names]))
with self.test_session() as sess:
result1 = sess.run(
map_fn_output, feed_dict={
input_tensor: [[1, 2, 3], [4, 5, -1], [0, 6, 9]],
scalar_index_tensor: [[0], [2], [1]],
})
result2 = sess.run(
map_fn_output, feed_dict={
input_tensor: [[-1, 1, 0], [3, 9, 30]],
scalar_index_tensor: [[1], [0]]
})
self.assertAllEqual(result1, [1, -1, 6])
self.assertAllEqual(result2, [1, 3])
def test_with_multiple_static_shapes(self):
def fn(elems):
input_tensor, scalar_index_tensor = elems
return tf.reshape(tf.slice(input_tensor, scalar_index_tensor, [1]), [])
input_tensor = tf.constant([[1, 2, 3], [4, 5, -1], [0, 6, 9]],
dtype=tf.float32)
scalar_index_tensor = tf.constant([[0], [2], [1]], dtype=tf.int32)
map_fn_output = shape_utils.static_or_dynamic_map_fn(
fn, [input_tensor, scalar_index_tensor], dtype=tf.float32)
op_names = [op.name for op in tf.get_default_graph().get_operations()]
self.assertTrue(all(['map' != op_name[:3] for op_name in op_names]))
with self.test_session() as sess:
result = sess.run(map_fn_output)
self.assertAllEqual(result, [1, -1, 6])
def test_fails_with_nested_input(self):
def fn(input_tensor):
return input_tensor
input_tensor1 = tf.constant([1])
input_tensor2 = tf.constant([2])
with self.assertRaisesRegexp(
ValueError, '`elems` must be a Tensor or list of Tensors.'):
shape_utils.static_or_dynamic_map_fn(
fn, [input_tensor1, [input_tensor2]], dtype=tf.float32)
class CheckMinImageShapeTest(tf.test.TestCase):
def test_check_min_image_dim_static_shape(self):
input_tensor = tf.constant(np.zeros([1, 42, 42, 3]))
_ = shape_utils.check_min_image_dim(33, input_tensor)
with self.assertRaisesRegexp(
ValueError, 'image size must be >= 64 in both height and width.'):
_ = shape_utils.check_min_image_dim(64, input_tensor)
def test_check_min_image_dim_dynamic_shape(self):
input_placeholder = tf.placeholder(tf.float32, shape=[1, None, None, 3])
image_tensor = shape_utils.check_min_image_dim(33, input_placeholder)
with self.test_session() as sess:
sess.run(image_tensor,
feed_dict={input_placeholder: np.zeros([1, 42, 42, 3])})
with self.assertRaises(tf.errors.InvalidArgumentError):
sess.run(image_tensor,
feed_dict={input_placeholder: np.zeros([1, 32, 32, 3])})
class AssertShapeEqualTest(tf.test.TestCase):
def test_unequal_static_shape_raises_exception(self):
shape_a = tf.constant(np.zeros([4, 2, 2, 1]))
shape_b = tf.constant(np.zeros([4, 2, 3, 1]))
with self.assertRaisesRegexp(
ValueError, 'Unequal shapes'):
shape_utils.assert_shape_equal(
shape_utils.combined_static_and_dynamic_shape(shape_a),
shape_utils.combined_static_and_dynamic_shape(shape_b))
def test_equal_static_shape_succeeds(self):
shape_a = tf.constant(np.zeros([4, 2, 2, 1]))
shape_b = tf.constant(np.zeros([4, 2, 2, 1]))
with self.test_session() as sess:
op = shape_utils.assert_shape_equal(
shape_utils.combined_static_and_dynamic_shape(shape_a),
shape_utils.combined_static_and_dynamic_shape(shape_b))
sess.run(op)
def test_unequal_dynamic_shape_raises_tf_assert(self):
tensor_a = tf.placeholder(tf.float32, shape=[1, None, None, 3])
tensor_b = tf.placeholder(tf.float32, shape=[1, None, None, 3])
op = shape_utils.assert_shape_equal(
shape_utils.combined_static_and_dynamic_shape(tensor_a),
shape_utils.combined_static_and_dynamic_shape(tensor_b))
with self.test_session() as sess:
with self.assertRaises(tf.errors.InvalidArgumentError):
sess.run(op, feed_dict={tensor_a: np.zeros([1, 2, 2, 3]),
tensor_b: np.zeros([1, 4, 4, 3])})
def test_equal_dynamic_shape_succeeds(self):
tensor_a = tf.placeholder(tf.float32, shape=[1, None, None, 3])
tensor_b = tf.placeholder(tf.float32, shape=[1, None, None, 3])
op = shape_utils.assert_shape_equal(
shape_utils.combined_static_and_dynamic_shape(tensor_a),
shape_utils.combined_static_and_dynamic_shape(tensor_b))
with self.test_session() as sess:
sess.run(op, feed_dict={tensor_a: np.zeros([1, 2, 2, 3]),
tensor_b: np.zeros([1, 2, 2, 3])})
def test_unequal_static_shape_along_first_dim_raises_exception(self):
shape_a = tf.constant(np.zeros([4, 2, 2, 1]))
shape_b = tf.constant(np.zeros([6, 2, 3, 1]))
with self.assertRaisesRegexp(
ValueError, 'Unequal first dimension'):
shape_utils.assert_shape_equal_along_first_dimension(
shape_utils.combined_static_and_dynamic_shape(shape_a),
shape_utils.combined_static_and_dynamic_shape(shape_b))
def test_equal_static_shape_along_first_dim_succeeds(self):
shape_a = tf.constant(np.zeros([4, 2, 2, 1]))
shape_b = tf.constant(np.zeros([4, 7, 2]))
with self.test_session() as sess:
op = shape_utils.assert_shape_equal_along_first_dimension(
shape_utils.combined_static_and_dynamic_shape(shape_a),
shape_utils.combined_static_and_dynamic_shape(shape_b))
sess.run(op)
def test_unequal_dynamic_shape_along_first_dim_raises_tf_assert(self):
tensor_a = tf.placeholder(tf.float32, shape=[None, None, None, 3])
tensor_b = tf.placeholder(tf.float32, shape=[None, None, 3])
op = shape_utils.assert_shape_equal_along_first_dimension(
shape_utils.combined_static_and_dynamic_shape(tensor_a),
shape_utils.combined_static_and_dynamic_shape(tensor_b))
with self.test_session() as sess:
with self.assertRaises(tf.errors.InvalidArgumentError):
sess.run(op, feed_dict={tensor_a: np.zeros([1, 2, 2, 3]),
tensor_b: np.zeros([2, 4, 3])})
def test_equal_dynamic_shape_along_first_dim_succeeds(self):
tensor_a = tf.placeholder(tf.float32, shape=[None, None, None, 3])
tensor_b = tf.placeholder(tf.float32, shape=[None])
op = shape_utils.assert_shape_equal_along_first_dimension(
shape_utils.combined_static_and_dynamic_shape(tensor_a),
shape_utils.combined_static_and_dynamic_shape(tensor_b))
with self.test_session() as sess:
sess.run(op, feed_dict={tensor_a: np.zeros([5, 2, 2, 3]),
tensor_b: np.zeros([5])})
if __name__ == '__main__':
tf.test.main()
"""A convenience wrapper around tf.test.TestCase to enable TPU tests."""
import tensorflow as tf
from tensorflow.contrib import tpu
flags = tf.app.flags
flags.DEFINE_bool('tpu_test', False, 'Whether to configure test for TPU.')
FLAGS = flags.FLAGS
class TestCase(tf.test.TestCase):
"""Extends tf.test.TestCase to optionally allow running tests on TPU."""
def execute_tpu(self, graph_fn, inputs):
"""Constructs the graph, executes it on TPU and returns the result.
Args:
graph_fn: a callable that constructs the tensorflow graph to test. The
arguments of this function should correspond to `inputs`.
inputs: a list of numpy arrays to feed input to the computation graph.
Returns:
A list of numpy arrays or a scalar returned from executing the tensorflow
graph.
"""
with self.test_session(graph=tf.Graph()) as sess:
placeholders = [tf.placeholder_with_default(v, v.shape) for v in inputs]
tpu_computation = tpu.rewrite(graph_fn, placeholders)
sess.run(tpu.initialize_system())
sess.run([tf.global_variables_initializer(), tf.tables_initializer(),
tf.local_variables_initializer()])
materialized_results = sess.run(tpu_computation,
feed_dict=dict(zip(placeholders, inputs)))
sess.run(tpu.shutdown_system())
if len(materialized_results) == 1:
materialized_results = materialized_results[0]
return materialized_results
def execute_cpu(self, graph_fn, inputs):
"""Constructs the graph, executes it on CPU and returns the result.
Args:
graph_fn: a callable that constructs the tensorflow graph to test. The
arguments of this function should correspond to `inputs`.
inputs: a list of numpy arrays to feed input to the computation graph.
Returns:
A list of numpy arrays or a scalar returned from executing the tensorflow
graph.
"""
with self.test_session(graph=tf.Graph()) as sess:
placeholders = [tf.placeholder_with_default(v, v.shape) for v in inputs]
results = graph_fn(*placeholders)
sess.run([tf.global_variables_initializer(), tf.tables_initializer(),
tf.local_variables_initializer()])
materialized_results = sess.run(results, feed_dict=dict(zip(placeholders,
inputs)))
if len(materialized_results) == 1:
materialized_results = materialized_results[0]
return materialized_results
def execute(self, graph_fn, inputs):
"""Constructs the graph, creates a test session and returns the results.
The graph is executed either on TPU or CPU based on the `tpu_test` flag.
Args:
graph_fn: a callable that constructs the tensorflow graph to test. The
arguments of this function should correspond to `inputs`.
inputs: a list of numpy arrays to feed input to the computation graph.
Returns:
A list of numpy arrays or a scalar returned from executing the tensorflow
graph.
"""
if FLAGS.tpu_test:
return self.execute_tpu(graph_fn, inputs)
else:
return self.execute_cpu(graph_fn, inputs)
......@@ -46,12 +46,13 @@ class MockBoxPredictor(box_predictor.BoxPredictor):
super(MockBoxPredictor, self).__init__(is_training, num_classes)
def _predict(self, image_features, num_predictions_per_location):
image_feature = image_features[0]
combined_feature_shape = shape_utils.combined_static_and_dynamic_shape(
image_features)
image_feature)
batch_size = combined_feature_shape[0]
num_anchors = (combined_feature_shape[1] * combined_feature_shape[2])
code_size = 4
zero = tf.reduce_sum(0 * image_features)
zero = tf.reduce_sum(0 * image_feature)
box_encodings = zero + tf.zeros(
(batch_size, num_anchors, 1, code_size), dtype=tf.float32)
class_predictions_with_background = zero + tf.zeros(
......
......@@ -96,7 +96,9 @@ def freeze_gradients_matching_regex(grads_and_vars, regex_list):
return kept_grads_and_vars
def get_variables_available_in_checkpoint(variables, checkpoint_path):
def get_variables_available_in_checkpoint(variables,
checkpoint_path,
include_global_step=True):
"""Returns the subset of variables available in the checkpoint.
Inspects given checkpoint and returns the subset of variables that are
......@@ -107,6 +109,8 @@ def get_variables_available_in_checkpoint(variables, checkpoint_path):
Args:
variables: a list or dictionary of variables to find in checkpoint.
checkpoint_path: path to the checkpoint to restore variables from.
include_global_step: whether to include `global_step` variable, if it
exists. Default True.
Returns:
A list or dictionary of variables.
......@@ -120,13 +124,20 @@ def get_variables_available_in_checkpoint(variables, checkpoint_path):
else:
raise ValueError('`variables` is expected to be a list or dict.')
ckpt_reader = tf.train.NewCheckpointReader(checkpoint_path)
ckpt_vars = ckpt_reader.get_variable_to_shape_map().keys()
ckpt_vars_to_shape_map = ckpt_reader.get_variable_to_shape_map()
if not include_global_step:
ckpt_vars_to_shape_map.pop(tf.GraphKeys.GLOBAL_STEP, None)
vars_in_ckpt = {}
for variable_name, variable in sorted(variable_names_map.items()):
if variable_name in ckpt_vars:
vars_in_ckpt[variable_name] = variable
if variable_name in ckpt_vars_to_shape_map:
if ckpt_vars_to_shape_map[variable_name] == variable.shape.as_list():
vars_in_ckpt[variable_name] = variable
else:
logging.warning('Variable [%s] is available in checkpoint, but has an '
'incompatible shape with model variable.',
variable_name)
else:
logging.warning('Variable [%s] not available in checkpoint',
logging.warning('Variable [%s] is not available in checkpoint',
variable_name)
if isinstance(variables, list):
return vars_in_ckpt.values()
......
......@@ -145,8 +145,11 @@ class GetVariablesAvailableInCheckpointTest(tf.test.TestCase):
def test_return_variables_available_in_checkpoint(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'graph.pb')
weight_variable = tf.Variable(1.0, name='weights')
global_step = tf.train.get_or_create_global_step()
graph1_variables = [
tf.Variable(1.0, name='weights'),
weight_variable,
global_step
]
init_op = tf.global_variables_initializer()
saver = tf.train.Saver(graph1_variables)
......@@ -156,8 +159,8 @@ class GetVariablesAvailableInCheckpointTest(tf.test.TestCase):
graph2_variables = graph1_variables + [tf.Variable(1.0, name='biases')]
out_variables = variables_helper.get_variables_available_in_checkpoint(
graph2_variables, checkpoint_path)
self.assertItemsEqual(out_variables, graph1_variables)
graph2_variables, checkpoint_path, include_global_step=False)
self.assertItemsEqual(out_variables, [weight_variable])
def test_return_variables_available_an_checkpoint_with_dict_inputs(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'graph.pb')
......@@ -180,6 +183,31 @@ class GetVariablesAvailableInCheckpointTest(tf.test.TestCase):
self.assertItemsEqual(out_variables.keys(), ['ckpt_weights'])
self.assertTrue(out_variables['ckpt_weights'].op.name == 'weights')
def test_return_variables_with_correct_sizes(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'graph.pb')
bias_variable = tf.Variable(3.0, name='biases')
global_step = tf.train.get_or_create_global_step()
graph1_variables = [
tf.Variable([[1.0, 2.0], [3.0, 4.0]], name='weights'),
bias_variable,
global_step
]
init_op = tf.global_variables_initializer()
saver = tf.train.Saver(graph1_variables)
with self.test_session() as sess:
sess.run(init_op)
saver.save(sess, checkpoint_path)
graph2_variables = [
tf.Variable([1.0, 2.0], name='weights'), # Note the new variable shape.
bias_variable,
global_step
]
out_variables = variables_helper.get_variables_available_in_checkpoint(
graph2_variables, checkpoint_path, include_global_step=True)
self.assertItemsEqual(out_variables, [bias_variable, global_step])
if __name__ == '__main__':
tf.test.main()
......@@ -21,7 +21,9 @@ The functions do not return a value, instead they modify the image itself.
"""
import collections
import functools
import matplotlib.pyplot as plt
# Set headless-friendly backend.
import matplotlib; matplotlib.use('Agg') # pylint: disable=multiple-statements
import matplotlib.pyplot as plt # pylint: disable=g-import-not-at-top
import numpy as np
import PIL.Image as Image
import PIL.ImageColor as ImageColor
......@@ -30,6 +32,8 @@ import PIL.ImageFont as ImageFont
import six
import tensorflow as tf
from object_detection.core import standard_fields as fields
_TITLE_LEFT_MARGIN = 10
_TITLE_TOP_MARGIN = 10
......@@ -100,9 +104,12 @@ def draw_bounding_box_on_image_array(image,
use_normalized_coordinates=True):
"""Adds a bounding box to an image (numpy array).
Bounding box coordinates can be specified in either absolute (pixel) or
normalized coordinates by setting the use_normalized_coordinates argument.
Args:
image: a numpy array with shape [height, width, 3].
ymin: ymin of bounding box in normalized coordinates (same below).
ymin: ymin of bounding box.
xmin: xmin of bounding box.
ymax: ymax of bounding box.
xmax: xmax of bounding box.
......@@ -132,6 +139,9 @@ def draw_bounding_box_on_image(image,
use_normalized_coordinates=True):
"""Adds a bounding box to an image.
Bounding box coordinates can be specified in either absolute (pixel) or
normalized coordinates by setting the use_normalized_coordinates argument.
Each string in display_str_list is displayed on a separate line above the
bounding box in black text on a rectangle filled with the input 'color'.
If the top of the bounding box extends to the edge of the image, the strings
......@@ -255,14 +265,58 @@ def draw_bounding_boxes_on_image(image,
boxes[i, 3], color, thickness, display_str_list)
def _visualize_boxes(image, boxes, classes, scores, category_index, **kwargs):
return visualize_boxes_and_labels_on_image_array(
image, boxes, classes, scores, category_index=category_index, **kwargs)
def _visualize_boxes_and_masks(image, boxes, classes, scores, masks,
category_index, **kwargs):
return visualize_boxes_and_labels_on_image_array(
image,
boxes,
classes,
scores,
category_index=category_index,
instance_masks=masks,
**kwargs)
def _visualize_boxes_and_keypoints(image, boxes, classes, scores, keypoints,
category_index, **kwargs):
return visualize_boxes_and_labels_on_image_array(
image,
boxes,
classes,
scores,
category_index=category_index,
keypoints=keypoints,
**kwargs)
def _visualize_boxes_and_masks_and_keypoints(
image, boxes, classes, scores, masks, keypoints, category_index, **kwargs):
return visualize_boxes_and_labels_on_image_array(
image,
boxes,
classes,
scores,
category_index=category_index,
instance_masks=masks,
keypoints=keypoints,
**kwargs)
def draw_bounding_boxes_on_image_tensors(images,
boxes,
classes,
scores,
category_index,
instance_masks=None,
keypoints=None,
max_boxes_to_draw=20,
min_score_thresh=0.2):
"""Draws bounding boxes on batch of image tensors.
"""Draws bounding boxes, masks, and keypoints on batch of image tensors.
Args:
images: A 4D uint8 image tensor of shape [N, H, W, C].
......@@ -272,37 +326,123 @@ def draw_bounding_boxes_on_image_tensors(images,
scores: [N, max_detections] float32 tensor of detection scores.
category_index: a dict that maps integer ids to category dicts. e.g.
{1: {1: 'dog'}, 2: {2: 'cat'}, ...}
instance_masks: A 4D uint8 tensor of shape [N, max_detection, H, W] with
instance masks.
keypoints: A 4D float32 tensor of shape [N, max_detection, num_keypoints, 2]
with keypoints.
max_boxes_to_draw: Maximum number of boxes to draw on an image. Default 20.
min_score_thresh: Minimum score threshold for visualization. Default 0.2.
Returns:
4D image tensor of type uint8, with boxes drawn on top.
"""
visualize_boxes_fn = functools.partial(
visualize_boxes_and_labels_on_image_array,
category_index=category_index,
instance_masks=None,
keypoints=None,
use_normalized_coordinates=True,
max_boxes_to_draw=max_boxes_to_draw,
min_score_thresh=min_score_thresh,
agnostic_mode=False,
line_thickness=4)
visualization_keyword_args = {
'use_normalized_coordinates': True,
'max_boxes_to_draw': max_boxes_to_draw,
'min_score_thresh': min_score_thresh,
'agnostic_mode': False,
'line_thickness': 4
}
if instance_masks is not None and keypoints is None:
visualize_boxes_fn = functools.partial(
_visualize_boxes_and_masks,
category_index=category_index,
**visualization_keyword_args)
elems = [images, boxes, classes, scores, instance_masks]
elif instance_masks is None and keypoints is not None:
visualize_boxes_fn = functools.partial(
_visualize_boxes_and_keypoints,
category_index=category_index,
**visualization_keyword_args)
elems = [images, boxes, classes, scores, keypoints]
elif instance_masks is not None and keypoints is not None:
visualize_boxes_fn = functools.partial(
_visualize_boxes_and_masks_and_keypoints,
category_index=category_index,
**visualization_keyword_args)
elems = [images, boxes, classes, scores, instance_masks, keypoints]
else:
visualize_boxes_fn = functools.partial(
_visualize_boxes,
category_index=category_index,
**visualization_keyword_args)
elems = [images, boxes, classes, scores]
def draw_boxes(image_boxes_classes_scores):
def draw_boxes(image_and_detections):
"""Draws boxes on image."""
(image, boxes, classes, scores) = image_boxes_classes_scores
image_with_boxes = tf.py_func(visualize_boxes_fn,
[image, boxes, classes, scores], tf.uint8)
image_with_boxes = tf.py_func(visualize_boxes_fn, image_and_detections,
tf.uint8)
return image_with_boxes
images = tf.map_fn(
draw_boxes, (images, boxes, classes, scores),
dtype=tf.uint8,
back_prop=False)
images = tf.map_fn(draw_boxes, elems, dtype=tf.uint8, back_prop=False)
return images
def draw_side_by_side_evaluation_image(eval_dict,
category_index,
max_boxes_to_draw=20,
min_score_thresh=0.2):
"""Creates a side-by-side image with detections and groundtruth.
Bounding boxes (and instance masks, if available) are visualized on both
subimages.
Args:
eval_dict: The evaluation dictionary returned by
eval_util.result_dict_for_single_example().
category_index: A category index (dictionary) produced from a labelmap.
max_boxes_to_draw: The maximum number of boxes to draw for detections.
min_score_thresh: The minimum score threshold for showing detections.
Returns:
A [1, H, 2 * W, C] uint8 tensor. The subimage on the left corresponds to
detections, while the subimage on the right corresponds to groundtruth.
"""
detection_fields = fields.DetectionResultFields()
input_data_fields = fields.InputDataFields()
instance_masks = None
if detection_fields.detection_masks in eval_dict:
instance_masks = tf.cast(
tf.expand_dims(eval_dict[detection_fields.detection_masks], axis=0),
tf.uint8)
keypoints = None
if detection_fields.detection_keypoints in eval_dict:
keypoints = tf.expand_dims(
eval_dict[detection_fields.detection_keypoints], axis=0)
groundtruth_instance_masks = None
if input_data_fields.groundtruth_instance_masks in eval_dict:
groundtruth_instance_masks = tf.cast(
tf.expand_dims(
eval_dict[input_data_fields.groundtruth_instance_masks], axis=0),
tf.uint8)
images_with_detections = draw_bounding_boxes_on_image_tensors(
eval_dict[input_data_fields.original_image],
tf.expand_dims(eval_dict[detection_fields.detection_boxes], axis=0),
tf.expand_dims(eval_dict[detection_fields.detection_classes], axis=0),
tf.expand_dims(eval_dict[detection_fields.detection_scores], axis=0),
category_index,
instance_masks=instance_masks,
keypoints=keypoints,
max_boxes_to_draw=max_boxes_to_draw,
min_score_thresh=min_score_thresh)
images_with_groundtruth = draw_bounding_boxes_on_image_tensors(
eval_dict[input_data_fields.original_image],
tf.expand_dims(eval_dict[input_data_fields.groundtruth_boxes], axis=0),
tf.expand_dims(eval_dict[input_data_fields.groundtruth_classes], axis=0),
tf.expand_dims(
tf.ones_like(
eval_dict[input_data_fields.groundtruth_classes],
dtype=tf.float32),
axis=0),
category_index,
instance_masks=groundtruth_instance_masks,
keypoints=None,
max_boxes_to_draw=None,
min_score_thresh=0.0)
return tf.concat([images_with_detections, images_with_groundtruth], axis=2)
def draw_keypoints_on_image_array(image,
keypoints,
color='red',
......@@ -352,7 +492,7 @@ def draw_keypoints_on_image(image,
outline=color, fill=color)
def draw_mask_on_image_array(image, mask, color='red', alpha=0.7):
def draw_mask_on_image_array(image, mask, color='red', alpha=0.4):
"""Draws mask on an image.
Args:
......@@ -360,7 +500,7 @@ def draw_mask_on_image_array(image, mask, color='red', alpha=0.7):
mask: a uint8 numpy array of shape (img_height, img_height) with
values between either 0 or 1.
color: color to draw the keypoints with. Default is red.
alpha: transparency value between 0 and 1. (default: 0.7)
alpha: transparency value between 0 and 1. (default: 0.4)
Raises:
ValueError: On incorrect data type for image or masks.
......@@ -371,6 +511,9 @@ def draw_mask_on_image_array(image, mask, color='red', alpha=0.7):
raise ValueError('`mask` not of type np.uint8')
if np.any(np.logical_and(mask != 1, mask != 0)):
raise ValueError('`mask` elements should be in [0, 1]')
if image.shape[:2] != mask.shape:
raise ValueError('The image has spatial dimensions %s but the mask has '
'dimensions %s' % (image.shape[:2], mask.shape))
rgb = ImageColor.getrgb(color)
pil_image = Image.fromarray(image)
......@@ -382,18 +525,23 @@ def draw_mask_on_image_array(image, mask, color='red', alpha=0.7):
np.copyto(image, np.array(pil_image.convert('RGB')))
def visualize_boxes_and_labels_on_image_array(image,
boxes,
classes,
scores,
category_index,
instance_masks=None,
keypoints=None,
use_normalized_coordinates=False,
max_boxes_to_draw=20,
min_score_thresh=.5,
agnostic_mode=False,
line_thickness=4):
def visualize_boxes_and_labels_on_image_array(
image,
boxes,
classes,
scores,
category_index,
instance_masks=None,
instance_boundaries=None,
keypoints=None,
use_normalized_coordinates=False,
max_boxes_to_draw=20,
min_score_thresh=.5,
agnostic_mode=False,
line_thickness=4,
groundtruth_box_visualization_color='black',
skip_scores=False,
skip_labels=False):
"""Overlay labeled boxes on an image with formatted scores and label names.
This function groups boxes that correspond to the same location
......@@ -411,8 +559,10 @@ def visualize_boxes_and_labels_on_image_array(image,
boxes and plot all boxes as black with no classes or scores.
category_index: a dict containing category dictionaries (each holding
category index `id` and category name `name`) keyed by category indices.
instance_masks: a numpy array of shape [N, image_height, image_width], can
be None
instance_masks: a numpy array of shape [N, image_height, image_width] with
values ranging between 0 and 1, can be None.
instance_boundaries: a numpy array of shape [N, image_height, image_width]
with values ranging between 0 and 1, can be None.
keypoints: a numpy array of shape [N, num_keypoints, 2], can
be None
use_normalized_coordinates: whether boxes is to be interpreted as
......@@ -424,6 +574,10 @@ def visualize_boxes_and_labels_on_image_array(image,
class-agnostic mode or not. This mode will display scores but ignore
classes.
line_thickness: integer (default: 4) controlling line width of the boxes.
groundtruth_box_visualization_color: box color for visualizing groundtruth
boxes
skip_scores: whether to skip score when drawing a single detection
skip_labels: whether to skip label when drawing a single detection
Returns:
uint8 numpy array with shape (img_height, img_width, 3) with overlaid boxes.
......@@ -433,6 +587,7 @@ def visualize_boxes_and_labels_on_image_array(image,
box_to_display_str_map = collections.defaultdict(list)
box_to_color_map = collections.defaultdict(str)
box_to_instance_masks_map = {}
box_to_instance_boundaries_map = {}
box_to_keypoints_map = collections.defaultdict(list)
if not max_boxes_to_draw:
max_boxes_to_draw = boxes.shape[0]
......@@ -441,21 +596,26 @@ def visualize_boxes_and_labels_on_image_array(image,
box = tuple(boxes[i].tolist())
if instance_masks is not None:
box_to_instance_masks_map[box] = instance_masks[i]
if instance_boundaries is not None:
box_to_instance_boundaries_map[box] = instance_boundaries[i]
if keypoints is not None:
box_to_keypoints_map[box].extend(keypoints[i])
if scores is None:
box_to_color_map[box] = 'black'
box_to_color_map[box] = groundtruth_box_visualization_color
else:
if not agnostic_mode:
if classes[i] in category_index.keys():
class_name = category_index[classes[i]]['name']
display_str = ''
if not skip_labels:
if not agnostic_mode:
if classes[i] in category_index.keys():
class_name = category_index[classes[i]]['name']
else:
class_name = 'N/A'
display_str = str(class_name)
if not skip_scores:
if not display_str:
display_str = '{}%'.format(int(100*scores[i]))
else:
class_name = 'N/A'
display_str = '{}: {}%'.format(
class_name,
int(100*scores[i]))
else:
display_str = 'score: {}%'.format(int(100 * scores[i]))
display_str = '{}: {}%'.format(display_str, int(100*scores[i]))
box_to_display_str_map[box].append(display_str)
if agnostic_mode:
box_to_color_map[box] = 'DarkOrange'
......@@ -472,6 +632,13 @@ def visualize_boxes_and_labels_on_image_array(image,
box_to_instance_masks_map[box],
color=color
)
if instance_boundaries is not None:
draw_mask_on_image_array(
image,
box_to_instance_boundaries_map[box],
color='red',
alpha=1.0
)
draw_bounding_box_on_image_array(
image,
ymin,
......@@ -518,7 +685,7 @@ def add_cdf_image_summary(values, name):
fig.canvas.draw()
width, height = fig.get_size_inches() * fig.get_dpi()
image = np.fromstring(fig.canvas.tostring_rgb(), dtype='uint8').reshape(
1, height, width, 3)
1, int(height), int(width), 3)
return image
cdf_plot = tf.py_func(cdf_plot, [values], tf.uint8)
tf.summary.image(name, cdf_plot)
......@@ -145,7 +145,7 @@ class VisualizationUtilsTest(tf.test.TestCase):
for i in range(images_with_boxes_np.shape[0]):
img_name = 'image_' + str(i) + '.png'
output_file = os.path.join(self.get_temp_dir(), img_name)
print('Writing output image %d to %s' % (i, output_file))
print 'Writing output image %d to %s' % (i, output_file)
image_pil = Image.fromarray(images_with_boxes_np[i, ...])
image_pil.save(output_file)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment