"docs/vscode:/vscode.git/clone" did not exist on "acf77e3964d9c535fb7fcdcd708d37bad0bd32fe"
Commit a4944a57 authored by derekjchow's avatar derekjchow Committed by Sergio Guadarrama
Browse files

Add Tensorflow Object Detection API. (#1561)

For details see our paper:
"Speed/accuracy trade-offs for modern convolutional object detectors."
Huang J, Rathod V, Sun C, Zhu M, Korattikara A, Fathi A, Fischer I,
Wojna Z, Song Y, Guadarrama S, Murphy K, CVPR 2017
https://arxiv.org/abs/1611.10012
parent 60c3ed2e
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for region_similarity_calculator."""
import tensorflow as tf
from object_detection.core import box_list
from object_detection.core import region_similarity_calculator
class RegionSimilarityCalculatorTest(tf.test.TestCase):
def test_get_correct_pairwise_similarity_based_on_iou(self):
corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]])
exp_output = [[2.0 / 16.0, 0, 6.0 / 400.0], [1.0 / 16.0, 0.0, 5.0 / 400.0]]
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
iou_similarity_calculator = region_similarity_calculator.IouSimilarity()
iou_similarity = iou_similarity_calculator.compare(boxes1, boxes2)
with self.test_session() as sess:
iou_output = sess.run(iou_similarity)
self.assertAllClose(iou_output, exp_output)
def test_get_correct_pairwise_similarity_based_on_squared_distances(self):
corners1 = tf.constant([[0.0, 0.0, 0.0, 0.0],
[1.0, 1.0, 0.0, 2.0]])
corners2 = tf.constant([[3.0, 4.0, 1.0, 0.0],
[-4.0, 0.0, 0.0, 3.0],
[0.0, 0.0, 0.0, 0.0]])
exp_output = [[-26, -25, 0], [-18, -27, -6]]
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
dist_similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
dist_similarity = dist_similarity_calc.compare(boxes1, boxes2)
with self.test_session() as sess:
dist_output = sess.run(dist_similarity)
self.assertAllClose(dist_output, exp_output)
def test_get_correct_pairwise_similarity_based_on_ioa(self):
corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]])
exp_output_1 = [[2.0 / 12.0, 0, 6.0 / 400.0],
[1.0 / 12.0, 0.0, 5.0 / 400.0]]
exp_output_2 = [[2.0 / 6.0, 1.0 / 5.0],
[0, 0],
[6.0 / 6.0, 5.0 / 5.0]]
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
ioa_similarity_calculator = region_similarity_calculator.IoaSimilarity()
ioa_similarity_1 = ioa_similarity_calculator.compare(boxes1, boxes2)
ioa_similarity_2 = ioa_similarity_calculator.compare(boxes2, boxes1)
with self.test_session() as sess:
iou_output_1, iou_output_2 = sess.run(
[ioa_similarity_1, ioa_similarity_2])
self.assertAllClose(iou_output_1, exp_output_1)
self.assertAllClose(iou_output_2, exp_output_2)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains classes specifying naming conventions used for object detection.
Specifies:
InputDataFields: standard fields used by reader/preprocessor/batcher.
BoxListFields: standard field used by BoxList
TfExampleFields: standard fields for tf-example data format (go/tf-example).
"""
class InputDataFields(object):
"""Names for the input tensors.
Holds the standard data field names to use for identifying input tensors. This
should be used by the decoder to identify keys for the returned tensor_dict
containing input tensors. And it should be used by the model to identify the
tensors it needs.
Attributes:
image: image.
original_image: image in the original input size.
key: unique key corresponding to image.
source_id: source of the original image.
filename: original filename of the dataset (without common path).
groundtruth_image_classes: image-level class labels.
groundtruth_boxes: coordinates of the ground truth boxes in the image.
groundtruth_classes: box-level class labels.
groundtruth_label_types: box-level label types (e.g. explicit negative).
groundtruth_is_crowd: is the groundtruth a single object or a crowd.
groundtruth_area: area of a groundtruth segment.
groundtruth_difficult: is a `difficult` object
proposal_boxes: coordinates of object proposal boxes.
proposal_objectness: objectness score of each proposal.
groundtruth_instance_masks: ground truth instance masks.
groundtruth_instance_classes: instance mask-level class labels.
groundtruth_keypoints: ground truth keypoints.
groundtruth_keypoint_visibilities: ground truth keypoint visibilities.
groundtruth_label_scores: groundtruth label scores.
"""
image = 'image'
original_image = 'original_image'
key = 'key'
source_id = 'source_id'
filename = 'filename'
groundtruth_image_classes = 'groundtruth_image_classes'
groundtruth_boxes = 'groundtruth_boxes'
groundtruth_classes = 'groundtruth_classes'
groundtruth_label_types = 'groundtruth_label_types'
groundtruth_is_crowd = 'groundtruth_is_crowd'
groundtruth_area = 'groundtruth_area'
groundtruth_difficult = 'groundtruth_difficult'
proposal_boxes = 'proposal_boxes'
proposal_objectness = 'proposal_objectness'
groundtruth_instance_masks = 'groundtruth_instance_masks'
groundtruth_instance_classes = 'groundtruth_instance_classes'
groundtruth_keypoints = 'groundtruth_keypoints'
groundtruth_keypoint_visibilities = 'groundtruth_keypoint_visibilities'
groundtruth_label_scores = 'groundtruth_label_scores'
class BoxListFields(object):
"""Naming conventions for BoxLists.
Attributes:
boxes: bounding box coordinates.
classes: classes per bounding box.
scores: scores per bounding box.
weights: sample weights per bounding box.
objectness: objectness score per bounding box.
masks: masks per bounding box.
keypoints: keypoints per bounding box.
keypoint_heatmaps: keypoint heatmaps per bounding box.
"""
boxes = 'boxes'
classes = 'classes'
scores = 'scores'
weights = 'weights'
objectness = 'objectness'
masks = 'masks'
keypoints = 'keypoints'
keypoint_heatmaps = 'keypoint_heatmaps'
class TfExampleFields(object):
"""TF-example proto feature names for object detection.
Holds the standard feature names to load from an Example proto for object
detection.
Attributes:
image_encoded: JPEG encoded string
image_format: image format, e.g. "JPEG"
filename: filename
channels: number of channels of image
colorspace: colorspace, e.g. "RGB"
height: height of image in pixels, e.g. 462
width: width of image in pixels, e.g. 581
source_id: original source of the image
object_class_text: labels in text format, e.g. ["person", "cat"]
object_class_text: labels in numbers, e.g. [16, 8]
object_bbox_xmin: xmin coordinates of groundtruth box, e.g. 10, 30
object_bbox_xmax: xmax coordinates of groundtruth box, e.g. 50, 40
object_bbox_ymin: ymin coordinates of groundtruth box, e.g. 40, 50
object_bbox_ymax: ymax coordinates of groundtruth box, e.g. 80, 70
object_view: viewpoint of object, e.g. ["frontal", "left"]
object_truncated: is object truncated, e.g. [true, false]
object_occluded: is object occluded, e.g. [true, false]
object_difficult: is object difficult, e.g. [true, false]
object_is_crowd: is the object a single object or a crowd
object_segment_area: the area of the segment.
instance_masks: instance segmentation masks.
instance_classes: Classes for each instance segmentation mask.
"""
image_encoded = 'image/encoded'
image_format = 'image/format' # format is reserved keyword
filename = 'image/filename'
channels = 'image/channels'
colorspace = 'image/colorspace'
height = 'image/height'
width = 'image/width'
source_id = 'image/source_id'
object_class_text = 'image/object/class/text'
object_class_label = 'image/object/class/label'
object_bbox_ymin = 'image/object/bbox/ymin'
object_bbox_xmin = 'image/object/bbox/xmin'
object_bbox_ymax = 'image/object/bbox/ymax'
object_bbox_xmax = 'image/object/bbox/xmax'
object_view = 'image/object/view'
object_truncated = 'image/object/truncated'
object_occluded = 'image/object/occluded'
object_difficult = 'image/object/difficult'
object_is_crowd = 'image/object/is_crowd'
object_segment_area = 'image/object/segment/area'
instance_masks = 'image/segmentation/object'
instance_classes = 'image/segmentation/object/class'
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Base target assigner module.
The job of a TargetAssigner is, for a given set of anchors (bounding boxes) and
groundtruth detections (bounding boxes), to assign classification and regression
targets to each anchor as well as weights to each anchor (specifying, e.g.,
which anchors should not contribute to training loss).
It assigns classification/regression targets by performing the following steps:
1) Computing pairwise similarity between anchors and groundtruth boxes using a
provided RegionSimilarity Calculator
2) Computing a matching based on the similarity matrix using a provided Matcher
3) Assigning regression targets based on the matching and a provided BoxCoder
4) Assigning classification targets based on the matching and groundtruth labels
Note that TargetAssigners only operate on detections from a single
image at a time, so any logic for applying a TargetAssigner to multiple
images must be handled externally.
"""
import tensorflow as tf
from object_detection.box_coders import faster_rcnn_box_coder
from object_detection.box_coders import mean_stddev_box_coder
from object_detection.core import box_coder as bcoder
from object_detection.core import box_list
from object_detection.core import box_list_ops
from object_detection.core import matcher as mat
from object_detection.core import region_similarity_calculator as sim_calc
from object_detection.matchers import argmax_matcher
from object_detection.matchers import bipartite_matcher
class TargetAssigner(object):
"""Target assigner to compute classification and regression targets."""
def __init__(self, similarity_calc, matcher, box_coder,
positive_class_weight=1.0, negative_class_weight=1.0,
unmatched_cls_target=None):
"""Construct Multibox Target Assigner.
Args:
similarity_calc: a RegionSimilarityCalculator
matcher: an object_detection.core.Matcher used to match groundtruth to
anchors.
box_coder: an object_detection.core.BoxCoder used to encode matching
groundtruth boxes with respect to anchors.
positive_class_weight: classification weight to be associated to positive
anchors (default: 1.0)
negative_class_weight: classification weight to be associated to negative
anchors (default: 1.0)
unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
which is consistent with the classification target for each
anchor (and can be empty for scalar targets). This shape must thus be
compatible with the groundtruth labels that are passed to the "assign"
function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
If set to None, unmatched_cls_target is set to be [0] for each anchor.
Raises:
ValueError: if similarity_calc is not a RegionSimilarityCalculator or
if matcher is not a Matcher or if box_coder is not a BoxCoder
"""
if not isinstance(similarity_calc, sim_calc.RegionSimilarityCalculator):
raise ValueError('similarity_calc must be a RegionSimilarityCalculator')
if not isinstance(matcher, mat.Matcher):
raise ValueError('matcher must be a Matcher')
if not isinstance(box_coder, bcoder.BoxCoder):
raise ValueError('box_coder must be a BoxCoder')
self._similarity_calc = similarity_calc
self._matcher = matcher
self._box_coder = box_coder
self._positive_class_weight = positive_class_weight
self._negative_class_weight = negative_class_weight
if unmatched_cls_target is None:
self._unmatched_cls_target = tf.constant([0], tf.float32)
else:
self._unmatched_cls_target = unmatched_cls_target
@property
def box_coder(self):
return self._box_coder
def assign(self, anchors, groundtruth_boxes, groundtruth_labels=None,
**params):
"""Assign classification and regression targets to each anchor.
For a given set of anchors and groundtruth detections, match anchors
to groundtruth_boxes and assign classification and regression targets to
each anchor as well as weights based on the resulting match (specifying,
e.g., which anchors should not contribute to training loss).
Anchors that are not matched to anything are given a classification target
of self._unmatched_cls_target which can be specified via the constructor.
Args:
anchors: a BoxList representing N anchors
groundtruth_boxes: a BoxList representing M groundtruth boxes
groundtruth_labels: a tensor of shape [num_gt_boxes, d_1, ... d_k]
with labels for each of the ground_truth boxes. The subshape
[d_1, ... d_k] can be empty (corresponding to scalar inputs). When set
to None, groundtruth_labels assumes a binary problem where all
ground_truth boxes get a positive label (of 1).
**params: Additional keyword arguments for specific implementations of
the Matcher.
Returns:
cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
which has shape [num_gt_boxes, d_1, d_2, ... d_k].
cls_weights: a float32 tensor with shape [num_anchors]
reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension]
reg_weights: a float32 tensor with shape [num_anchors]
match: a matcher.Match object encoding the match between anchors and
groundtruth boxes, with rows corresponding to groundtruth boxes
and columns corresponding to anchors.
Raises:
ValueError: if anchors or groundtruth_boxes are not of type
box_list.BoxList
"""
if not isinstance(anchors, box_list.BoxList):
raise ValueError('anchors must be an BoxList')
if not isinstance(groundtruth_boxes, box_list.BoxList):
raise ValueError('groundtruth_boxes must be an BoxList')
if groundtruth_labels is None:
groundtruth_labels = tf.ones(tf.expand_dims(groundtruth_boxes.num_boxes(),
0))
groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)
shape_assert = tf.assert_equal(tf.shape(groundtruth_labels)[1:],
tf.shape(self._unmatched_cls_target))
with tf.control_dependencies([shape_assert]):
match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes,
anchors)
match = self._matcher.match(match_quality_matrix, **params)
reg_targets = self._create_regression_targets(anchors,
groundtruth_boxes,
match)
cls_targets = self._create_classification_targets(groundtruth_labels,
match)
reg_weights = self._create_regression_weights(match)
cls_weights = self._create_classification_weights(
match, self._positive_class_weight, self._negative_class_weight)
num_anchors = anchors.num_boxes_static()
if num_anchors is not None:
reg_targets = self._reset_target_shape(reg_targets, num_anchors)
cls_targets = self._reset_target_shape(cls_targets, num_anchors)
reg_weights = self._reset_target_shape(reg_weights, num_anchors)
cls_weights = self._reset_target_shape(cls_weights, num_anchors)
return cls_targets, cls_weights, reg_targets, reg_weights, match
def _reset_target_shape(self, target, num_anchors):
"""Sets the static shape of the target.
Args:
target: the target tensor. Its first dimension will be overwritten.
num_anchors: the number of anchors, which is used to override the target's
first dimension.
Returns:
A tensor with the shape info filled in.
"""
target_shape = target.get_shape().as_list()
target_shape[0] = num_anchors
target.set_shape(target_shape)
return target
def _create_regression_targets(self, anchors, groundtruth_boxes, match):
"""Returns a regression target for each anchor.
Args:
anchors: a BoxList representing N anchors
groundtruth_boxes: a BoxList representing M groundtruth_boxes
match: a matcher.Match object
Returns:
reg_targets: a float32 tensor with shape [N, box_code_dimension]
"""
matched_anchor_indices = match.matched_column_indices()
unmatched_ignored_anchor_indices = (match.
unmatched_or_ignored_column_indices())
matched_gt_indices = match.matched_row_indices()
matched_anchors = box_list_ops.gather(anchors,
matched_anchor_indices)
matched_gt_boxes = box_list_ops.gather(groundtruth_boxes,
matched_gt_indices)
matched_reg_targets = self._box_coder.encode(matched_gt_boxes,
matched_anchors)
unmatched_ignored_reg_targets = tf.tile(
self._default_regression_target(),
tf.stack([tf.size(unmatched_ignored_anchor_indices), 1]))
reg_targets = tf.dynamic_stitch(
[matched_anchor_indices, unmatched_ignored_anchor_indices],
[matched_reg_targets, unmatched_ignored_reg_targets])
# TODO: summarize the number of matches on average.
return reg_targets
def _default_regression_target(self):
"""Returns the default target for anchors to regress to.
Default regression targets are set to zero (though in
this implementation what these targets are set to should
not matter as the regression weight of any box set to
regress to the default target is zero).
Returns:
default_target: a float32 tensor with shape [1, box_code_dimension]
"""
return tf.constant([self._box_coder.code_size*[0]], tf.float32)
def _create_classification_targets(self, groundtruth_labels, match):
"""Create classification targets for each anchor.
Assign a classification target of for each anchor to the matching
groundtruth label that is provided by match. Anchors that are not matched
to anything are given the target self._unmatched_cls_target
Args:
groundtruth_labels: a tensor of shape [num_gt_boxes, d_1, ... d_k]
with labels for each of the ground_truth boxes. The subshape
[d_1, ... d_k] can be empty (corresponding to scalar labels).
match: a matcher.Match object that provides a matching between anchors
and groundtruth boxes.
Returns:
cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
which has shape [num_gt_boxes, d_1, d_2, ... d_k].
"""
matched_anchor_indices = match.matched_column_indices()
unmatched_ignored_anchor_indices = (match.
unmatched_or_ignored_column_indices())
matched_gt_indices = match.matched_row_indices()
matched_cls_targets = tf.gather(groundtruth_labels, matched_gt_indices)
ones = self._unmatched_cls_target.shape.ndims * [1]
unmatched_ignored_cls_targets = tf.tile(
tf.expand_dims(self._unmatched_cls_target, 0),
tf.stack([tf.size(unmatched_ignored_anchor_indices)] + ones))
cls_targets = tf.dynamic_stitch(
[matched_anchor_indices, unmatched_ignored_anchor_indices],
[matched_cls_targets, unmatched_ignored_cls_targets])
return cls_targets
def _create_regression_weights(self, match):
"""Set regression weight for each anchor.
Only positive anchors are set to contribute to the regression loss, so this
method returns a weight of 1 for every positive anchor and 0 for every
negative anchor.
Args:
match: a matcher.Match object that provides a matching between anchors
and groundtruth boxes.
Returns:
reg_weights: a float32 tensor with shape [num_anchors] representing
regression weights
"""
reg_weights = tf.cast(match.matched_column_indicator(), tf.float32)
return reg_weights
def _create_classification_weights(self,
match,
positive_class_weight=1.0,
negative_class_weight=1.0):
"""Create classification weights for each anchor.
Positive (matched) anchors are associated with a weight of
positive_class_weight and negative (unmatched) anchors are associated with
a weight of negative_class_weight. When anchors are ignored, weights are set
to zero. By default, both positive/negative weights are set to 1.0,
but they can be adjusted to handle class imbalance (which is almost always
the case in object detection).
Args:
match: a matcher.Match object that provides a matching between anchors
and groundtruth boxes.
positive_class_weight: weight to be associated to positive anchors
negative_class_weight: weight to be associated to negative anchors
Returns:
cls_weights: a float32 tensor with shape [num_anchors] representing
classification weights.
"""
matched_indicator = tf.cast(match.matched_column_indicator(), tf.float32)
ignore_indicator = tf.cast(match.ignored_column_indicator(), tf.float32)
unmatched_indicator = 1.0 - matched_indicator - ignore_indicator
cls_weights = (positive_class_weight * matched_indicator
+ negative_class_weight * unmatched_indicator)
return cls_weights
def get_box_coder(self):
"""Get BoxCoder of this TargetAssigner.
Returns:
BoxCoder: BoxCoder object.
"""
return self._box_coder
# TODO: This method pulls in all the implementation dependencies into core.
# Therefore its best to have this factory method outside of core.
def create_target_assigner(reference, stage=None,
positive_class_weight=1.0,
negative_class_weight=1.0,
unmatched_cls_target=None):
"""Factory function for creating standard target assigners.
Args:
reference: string referencing the type of TargetAssigner.
stage: string denoting stage: {proposal, detection}.
positive_class_weight: classification weight to be associated to positive
anchors (default: 1.0)
negative_class_weight: classification weight to be associated to negative
anchors (default: 1.0)
unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
which is consistent with the classification target for each
anchor (and can be empty for scalar targets). This shape must thus be
compatible with the groundtruth labels that are passed to the Assign
function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
If set to None, unmatched_cls_target is set to be 0 for each anchor.
Returns:
TargetAssigner: desired target assigner.
Raises:
ValueError: if combination reference+stage is invalid.
"""
if reference == 'Multibox' and stage == 'proposal':
similarity_calc = sim_calc.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
elif reference == 'FasterRCNN' and stage == 'proposal':
similarity_calc = sim_calc.IouSimilarity()
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.7,
unmatched_threshold=0.3,
force_match_for_each_row=True)
box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
scale_factors=[10.0, 10.0, 5.0, 5.0])
elif reference == 'FasterRCNN' and stage == 'detection':
similarity_calc = sim_calc.IouSimilarity()
# Uses all proposals with IOU < 0.5 as candidate negatives.
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
negatives_lower_than_unmatched=True)
box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
scale_factors=[10.0, 10.0, 5.0, 5.0])
elif reference == 'FastRCNN':
similarity_calc = sim_calc.IouSimilarity()
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
unmatched_threshold=0.1,
force_match_for_each_row=False,
negatives_lower_than_unmatched=False)
box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
else:
raise ValueError('No valid combination of reference and stage.')
return TargetAssigner(similarity_calc, matcher, box_coder,
positive_class_weight=positive_class_weight,
negative_class_weight=negative_class_weight,
unmatched_cls_target=unmatched_cls_target)
def batch_assign_targets(target_assigner,
anchors_batch,
gt_box_batch,
gt_class_targets_batch):
"""Batched assignment of classification and regression targets.
Args:
target_assigner: a target assigner.
anchors_batch: BoxList representing N box anchors or list of BoxList objects
with length batch_size representing anchor sets.
gt_box_batch: a list of BoxList objects with length batch_size
representing groundtruth boxes for each image in the batch
gt_class_targets_batch: a list of tensors with length batch_size, where
each tensor has shape [num_gt_boxes_i, classification_target_size] and
num_gt_boxes_i is the number of boxes in the ith boxlist of
gt_box_batch.
Returns:
batch_cls_targets: a tensor with shape [batch_size, num_anchors,
num_classes],
batch_cls_weights: a tensor with shape [batch_size, num_anchors],
batch_reg_targets: a tensor with shape [batch_size, num_anchors,
box_code_dimension]
batch_reg_weights: a tensor with shape [batch_size, num_anchors],
match_list: a list of matcher.Match objects encoding the match between
anchors and groundtruth boxes for each image of the batch,
with rows of the Match objects corresponding to groundtruth boxes
and columns corresponding to anchors.
Raises:
ValueError: if input list lengths are inconsistent, i.e.,
batch_size == len(gt_box_batch) == len(gt_class_targets_batch)
and batch_size == len(anchors_batch) unless anchors_batch is a single
BoxList.
"""
if not isinstance(anchors_batch, list):
anchors_batch = len(gt_box_batch) * [anchors_batch]
if not all(
isinstance(anchors, box_list.BoxList) for anchors in anchors_batch):
raise ValueError('anchors_batch must be a BoxList or list of BoxLists.')
if not (len(anchors_batch)
== len(gt_box_batch)
== len(gt_class_targets_batch)):
raise ValueError('batch size incompatible with lengths of anchors_batch, '
'gt_box_batch and gt_class_targets_batch.')
cls_targets_list = []
cls_weights_list = []
reg_targets_list = []
reg_weights_list = []
match_list = []
for anchors, gt_boxes, gt_class_targets in zip(
anchors_batch, gt_box_batch, gt_class_targets_batch):
(cls_targets, cls_weights, reg_targets,
reg_weights, match) = target_assigner.assign(
anchors, gt_boxes, gt_class_targets)
cls_targets_list.append(cls_targets)
cls_weights_list.append(cls_weights)
reg_targets_list.append(reg_targets)
reg_weights_list.append(reg_weights)
match_list.append(match)
batch_cls_targets = tf.stack(cls_targets_list)
batch_cls_weights = tf.stack(cls_weights_list)
batch_reg_targets = tf.stack(reg_targets_list)
batch_reg_weights = tf.stack(reg_weights_list)
return (batch_cls_targets, batch_cls_weights, batch_reg_targets,
batch_reg_weights, match_list)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.target_assigner."""
import numpy as np
import tensorflow as tf
from object_detection.box_coders import mean_stddev_box_coder
from object_detection.core import box_list
from object_detection.core import region_similarity_calculator
from object_detection.core import target_assigner as targetassigner
from object_detection.matchers import argmax_matcher
from object_detection.matchers import bipartite_matcher
class TargetAssignerTest(tf.test.TestCase):
def test_assign_agnostic(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder, unmatched_cls_target=None)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0]])
prior_stddevs = tf.constant(3 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
box_corners = [[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.9, 0.9]]
boxes = box_list.BoxList(tf.constant(box_corners))
exp_cls_targets = [[1], [1], [0]]
exp_cls_weights = [1, 1, 1]
exp_reg_targets = [[0, 0, 0, 0],
[0, 0, -1, 1],
[0, 0, 0, 0]]
exp_reg_weights = [1, 1, 0]
exp_matching_anchors = [0, 1]
result = target_assigner.assign(priors, boxes, num_valid_rows=2)
(cls_targets, cls_weights, reg_targets, reg_weights, match) = result
with self.test_session() as sess:
(cls_targets_out, cls_weights_out,
reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
[cls_targets, cls_weights, reg_targets, reg_weights,
match.matched_column_indices()])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(matching_anchors_out, exp_matching_anchors)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertEquals(matching_anchors_out.dtype, np.int32)
def test_assign_with_ignored_matches(self):
# Note: test is very similar to above. The third box matched with an IOU
# of 0.35, which is between the matched and unmatched threshold. This means
# That like above the expected classification targets are [1, 1, 0].
# Unlike above, the third target is ignored and therefore expected
# classification weights are [1, 1, 0].
similarity_calc = region_similarity_calculator.IouSimilarity()
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
unmatched_threshold=0.3)
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0.0, 0.5, .9, 1.0]])
prior_stddevs = tf.constant(3 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
box_corners = [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9]]
boxes = box_list.BoxList(tf.constant(box_corners))
exp_cls_targets = [[1], [1], [0]]
exp_cls_weights = [1, 1, 0]
exp_reg_targets = [[0, 0, 0, 0],
[0, 0, -1, 1],
[0, 0, 0, 0]]
exp_reg_weights = [1, 1, 0]
exp_matching_anchors = [0, 1]
result = target_assigner.assign(priors, boxes)
(cls_targets, cls_weights, reg_targets, reg_weights, match) = result
with self.test_session() as sess:
(cls_targets_out, cls_weights_out,
reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
[cls_targets, cls_weights, reg_targets, reg_weights,
match.matched_column_indices()])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(matching_anchors_out, exp_matching_anchors)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertEquals(matching_anchors_out.dtype, np.int32)
def test_assign_multiclass(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
unmatched_cls_target=unmatched_cls_target)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0],
[.75, 0, 1.0, .25]])
prior_stddevs = tf.constant(4 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
box_corners = [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9],
[.75, 0, .95, .27]]
boxes = box_list.BoxList(tf.constant(box_corners))
groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 1, 0, 0, 0]], tf.float32)
exp_cls_targets = [[0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0],
[1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0]]
exp_cls_weights = [1, 1, 1, 1]
exp_reg_targets = [[0, 0, 0, 0],
[0, 0, -1, 1],
[0, 0, 0, 0],
[0, 0, -.5, .2]]
exp_reg_weights = [1, 1, 0, 1]
exp_matching_anchors = [0, 1, 3]
result = target_assigner.assign(priors, boxes, groundtruth_labels,
num_valid_rows=3)
(cls_targets, cls_weights, reg_targets, reg_weights, match) = result
with self.test_session() as sess:
(cls_targets_out, cls_weights_out,
reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
[cls_targets, cls_weights, reg_targets, reg_weights,
match.matched_column_indices()])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(matching_anchors_out, exp_matching_anchors)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertEquals(matching_anchors_out.dtype, np.int32)
def test_assign_multiclass_unequal_class_weights(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
positive_class_weight=1.0, negative_class_weight=0.5,
unmatched_cls_target=unmatched_cls_target)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0],
[.75, 0, 1.0, .25]])
prior_stddevs = tf.constant(4 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
box_corners = [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9],
[.75, 0, .95, .27]]
boxes = box_list.BoxList(tf.constant(box_corners))
groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 1, 0, 0, 0]], tf.float32)
exp_cls_weights = [1, 1, .5, 1]
result = target_assigner.assign(priors, boxes, groundtruth_labels,
num_valid_rows=3)
(_, cls_weights, _, _, _) = result
with self.test_session() as sess:
cls_weights_out = sess.run(cls_weights)
self.assertAllClose(cls_weights_out, exp_cls_weights)
def test_assign_multidimensional_class_targets(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([[0, 0], [0, 0]], tf.float32)
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
unmatched_cls_target=unmatched_cls_target)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0],
[.75, 0, 1.0, .25]])
prior_stddevs = tf.constant(4 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
box_corners = [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9],
[.75, 0, .95, .27]]
boxes = box_list.BoxList(tf.constant(box_corners))
groundtruth_labels = tf.constant([[[0, 1], [1, 0]],
[[1, 0], [0, 1]],
[[0, 1], [1, .5]]], tf.float32)
exp_cls_targets = [[[0, 1], [1, 0]],
[[1, 0], [0, 1]],
[[0, 0], [0, 0]],
[[0, 1], [1, .5]]]
exp_cls_weights = [1, 1, 1, 1]
exp_reg_targets = [[0, 0, 0, 0],
[0, 0, -1, 1],
[0, 0, 0, 0],
[0, 0, -.5, .2]]
exp_reg_weights = [1, 1, 0, 1]
exp_matching_anchors = [0, 1, 3]
result = target_assigner.assign(priors, boxes, groundtruth_labels,
num_valid_rows=3)
(cls_targets, cls_weights, reg_targets, reg_weights, match) = result
with self.test_session() as sess:
(cls_targets_out, cls_weights_out,
reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
[cls_targets, cls_weights, reg_targets, reg_weights,
match.matched_column_indices()])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(matching_anchors_out, exp_matching_anchors)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertEquals(matching_anchors_out.dtype, np.int32)
def test_assign_empty_groundtruth(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([0, 0, 0], tf.float32)
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
unmatched_cls_target=unmatched_cls_target)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0],
[.75, 0, 1.0, .25]])
prior_stddevs = tf.constant(4 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
box_corners_expanded = tf.constant([[0.0, 0.0, 0.0, 0.0]])
box_corners = tf.slice(box_corners_expanded, [0, 0], [0, 4])
boxes = box_list.BoxList(box_corners)
groundtruth_labels_expanded = tf.constant([[0, 0, 0]], tf.float32)
groundtruth_labels = tf.slice(groundtruth_labels_expanded, [0, 0], [0, 3])
exp_cls_targets = [[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
[0, 0, 0]]
exp_cls_weights = [1, 1, 1, 1]
exp_reg_targets = [[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]]
exp_reg_weights = [0, 0, 0, 0]
exp_matching_anchors = []
result = target_assigner.assign(priors, boxes, groundtruth_labels)
(cls_targets, cls_weights, reg_targets, reg_weights, match) = result
with self.test_session() as sess:
(cls_targets_out, cls_weights_out,
reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
[cls_targets, cls_weights, reg_targets, reg_weights,
match.matched_column_indices()])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(matching_anchors_out, exp_matching_anchors)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertEquals(matching_anchors_out.dtype, np.int32)
def test_raises_error_on_invalid_groundtruth_labels(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([[0, 0], [0, 0], [0, 0]], tf.float32)
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
unmatched_cls_target=unmatched_cls_target)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5]])
prior_stddevs = tf.constant([[1.0, 1.0, 1.0, 1.0]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
box_corners = [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9],
[.75, 0, .95, .27]]
boxes = box_list.BoxList(tf.constant(box_corners))
groundtruth_labels = tf.constant([[[0, 1], [1, 0]]], tf.float32)
with self.assertRaises(ValueError):
target_assigner.assign(priors, boxes, groundtruth_labels,
num_valid_rows=3)
class BatchTargetAssignerTest(tf.test.TestCase):
def _get_agnostic_target_assigner(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
return targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
positive_class_weight=1.0,
negative_class_weight=1.0,
unmatched_cls_target=None)
def _get_multi_class_target_assigner(self, num_classes):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([1] + num_classes * [0], tf.float32)
return targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
positive_class_weight=1.0,
negative_class_weight=1.0,
unmatched_cls_target=unmatched_cls_target)
def _get_multi_dimensional_target_assigner(self, target_dimensions):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant(np.zeros(target_dimensions),
tf.float32)
return targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
positive_class_weight=1.0,
negative_class_weight=1.0,
unmatched_cls_target=unmatched_cls_target)
def test_batch_assign_targets(self):
box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]]))
box_list2 = box_list.BoxList(tf.constant(
[[0, 0.25123152, 1, 1],
[0.015789, 0.0985, 0.55789, 0.3842]]
))
gt_box_batch = [box_list1, box_list2]
gt_class_targets = [None, None]
prior_means = tf.constant([[0, 0, .25, .25],
[0, .25, 1, 1],
[0, .1, .5, .5],
[.75, .75, 1, 1]])
prior_stddevs = tf.constant([[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
exp_reg_targets = [[[0, 0, -0.5, -0.5],
[0, 0, 0, 0],
[0, 0, 0, 0,],
[0, 0, 0, 0,],],
[[0, 0, 0, 0,],
[0, 0.01231521, 0, 0],
[0.15789001, -0.01500003, 0.57889998, -1.15799987],
[0, 0, 0, 0]]]
exp_cls_weights = [[1, 1, 1, 1],
[1, 1, 1, 1]]
exp_cls_targets = [[[1], [0], [0], [0]],
[[0], [1], [1], [0]]]
exp_reg_weights = [[1, 0, 0, 0],
[0, 1, 1, 0]]
exp_match_0 = [0]
exp_match_1 = [1, 2]
agnostic_target_assigner = self._get_agnostic_target_assigner()
(cls_targets, cls_weights, reg_targets, reg_weights,
match_list) = targetassigner.batch_assign_targets(
agnostic_target_assigner, priors, gt_box_batch, gt_class_targets)
self.assertTrue(isinstance(match_list, list) and len(match_list) == 2)
with self.test_session() as sess:
(cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
match_out_0, match_out_1) = sess.run([
cls_targets, cls_weights, reg_targets, reg_weights] + [
match.matched_column_indices() for match in match_list])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(match_out_0, exp_match_0)
self.assertAllClose(match_out_1, exp_match_1)
def test_batch_assign_multiclass_targets(self):
box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]]))
box_list2 = box_list.BoxList(tf.constant(
[[0, 0.25123152, 1, 1],
[0.015789, 0.0985, 0.55789, 0.3842]]
))
gt_box_batch = [box_list1, box_list2]
class_targets1 = tf.constant([[0, 1, 0, 0]], tf.float32)
class_targets2 = tf.constant([[0, 0, 0, 1],
[0, 0, 1, 0]], tf.float32)
gt_class_targets = [class_targets1, class_targets2]
prior_means = tf.constant([[0, 0, .25, .25],
[0, .25, 1, 1],
[0, .1, .5, .5],
[.75, .75, 1, 1]])
prior_stddevs = tf.constant([[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
exp_reg_targets = [[[0, 0, -0.5, -0.5],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]],
[[0, 0, 0, 0],
[0, 0.01231521, 0, 0],
[0.15789001, -0.01500003, 0.57889998, -1.15799987],
[0, 0, 0, 0]]]
exp_cls_weights = [[1, 1, 1, 1],
[1, 1, 1, 1]]
exp_cls_targets = [[[0, 1, 0, 0],
[1, 0, 0, 0],
[1, 0, 0, 0],
[1, 0, 0, 0]],
[[1, 0, 0, 0],
[0, 0, 0, 1],
[0, 0, 1, 0],
[1, 0, 0, 0]]]
exp_reg_weights = [[1, 0, 0, 0],
[0, 1, 1, 0]]
exp_match_0 = [0]
exp_match_1 = [1, 2]
multiclass_target_assigner = self._get_multi_class_target_assigner(
num_classes=3)
(cls_targets, cls_weights, reg_targets, reg_weights,
match_list) = targetassigner.batch_assign_targets(
multiclass_target_assigner, priors, gt_box_batch, gt_class_targets)
self.assertTrue(isinstance(match_list, list) and len(match_list) == 2)
with self.test_session() as sess:
(cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
match_out_0, match_out_1) = sess.run([
cls_targets, cls_weights, reg_targets, reg_weights] + [
match.matched_column_indices() for match in match_list])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(match_out_0, exp_match_0)
self.assertAllClose(match_out_1, exp_match_1)
def test_batch_assign_multidimensional_targets(self):
box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]]))
box_list2 = box_list.BoxList(tf.constant(
[[0, 0.25123152, 1, 1],
[0.015789, 0.0985, 0.55789, 0.3842]]
))
gt_box_batch = [box_list1, box_list2]
class_targets1 = tf.constant([[[0, 1, 1],
[1, 1, 0]]], tf.float32)
class_targets2 = tf.constant([[[0, 1, 1],
[1, 1, 0]],
[[0, 0, 1],
[0, 0, 1]]], tf.float32)
gt_class_targets = [class_targets1, class_targets2]
prior_means = tf.constant([[0, 0, .25, .25],
[0, .25, 1, 1],
[0, .1, .5, .5],
[.75, .75, 1, 1]])
prior_stddevs = tf.constant([[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
exp_reg_targets = [[[0, 0, -0.5, -0.5],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]],
[[0, 0, 0, 0],
[0, 0.01231521, 0, 0],
[0.15789001, -0.01500003, 0.57889998, -1.15799987],
[0, 0, 0, 0]]]
exp_cls_weights = [[1, 1, 1, 1],
[1, 1, 1, 1]]
exp_cls_targets = [[[[0., 1., 1.],
[1., 1., 0.]],
[[0., 0., 0.],
[0., 0., 0.]],
[[0., 0., 0.],
[0., 0., 0.]],
[[0., 0., 0.],
[0., 0., 0.]]],
[[[0., 0., 0.],
[0., 0., 0.]],
[[0., 1., 1.],
[1., 1., 0.]],
[[0., 0., 1.],
[0., 0., 1.]],
[[0., 0., 0.],
[0., 0., 0.]]]]
exp_reg_weights = [[1, 0, 0, 0],
[0, 1, 1, 0]]
exp_match_0 = [0]
exp_match_1 = [1, 2]
multiclass_target_assigner = self._get_multi_dimensional_target_assigner(
target_dimensions=(2, 3))
(cls_targets, cls_weights, reg_targets, reg_weights,
match_list) = targetassigner.batch_assign_targets(
multiclass_target_assigner, priors, gt_box_batch, gt_class_targets)
self.assertTrue(isinstance(match_list, list) and len(match_list) == 2)
with self.test_session() as sess:
(cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
match_out_0, match_out_1) = sess.run([
cls_targets, cls_weights, reg_targets, reg_weights] + [
match.matched_column_indices() for match in match_list])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(match_out_0, exp_match_0)
self.assertAllClose(match_out_1, exp_match_1)
def test_batch_assign_empty_groundtruth(self):
box_coords_expanded = tf.zeros((1, 4), tf.float32)
box_coords = tf.slice(box_coords_expanded, [0, 0], [0, 4])
box_list1 = box_list.BoxList(box_coords)
gt_box_batch = [box_list1]
prior_means = tf.constant([[0, 0, .25, .25],
[0, .25, 1, 1]])
prior_stddevs = tf.constant([[.1, .1, .1, .1],
[.1, .1, .1, .1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
exp_reg_targets = [[[0, 0, 0, 0],
[0, 0, 0, 0]]]
exp_cls_weights = [[1, 1]]
exp_cls_targets = [[[1, 0, 0, 0],
[1, 0, 0, 0]]]
exp_reg_weights = [[0, 0]]
exp_match_0 = []
num_classes = 3
pad = 1
gt_class_targets = tf.zeros((0, num_classes + pad))
gt_class_targets_batch = [gt_class_targets]
multiclass_target_assigner = self._get_multi_class_target_assigner(
num_classes=3)
(cls_targets, cls_weights, reg_targets, reg_weights,
match_list) = targetassigner.batch_assign_targets(
multiclass_target_assigner, priors,
gt_box_batch, gt_class_targets_batch)
self.assertTrue(isinstance(match_list, list) and len(match_list) == 1)
with self.test_session() as sess:
(cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
match_out_0) = sess.run([
cls_targets, cls_weights, reg_targets, reg_weights] + [
match.matched_column_indices() for match in match_list])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(match_out_0, exp_match_0)
class CreateTargetAssignerTest(tf.test.TestCase):
def test_create_target_assigner(self):
"""Tests that named constructor gives working target assigners.
TODO: Make this test more general.
"""
corners = [[0.0, 0.0, 1.0, 1.0]]
groundtruth = box_list.BoxList(tf.constant(corners))
priors = box_list.BoxList(tf.constant(corners))
prior_stddevs = tf.constant([[1.0, 1.0, 1.0, 1.0]])
priors.add_field('stddev', prior_stddevs)
multibox_ta = (targetassigner
.create_target_assigner('Multibox', stage='proposal'))
multibox_ta.assign(priors, groundtruth)
# No tests on output, as that may vary arbitrarily as new target assigners
# are added. As long as it is constructed correctly and runs without errors,
# tests on the individual assigners cover correctness of the assignments.
anchors = box_list.BoxList(tf.constant(corners))
faster_rcnn_proposals_ta = (targetassigner
.create_target_assigner('FasterRCNN',
stage='proposal'))
faster_rcnn_proposals_ta.assign(anchors, groundtruth)
fast_rcnn_ta = (targetassigner
.create_target_assigner('FastRCNN'))
fast_rcnn_ta.assign(anchors, groundtruth)
faster_rcnn_detection_ta = (targetassigner
.create_target_assigner('FasterRCNN',
stage='detection'))
faster_rcnn_detection_ta.assign(anchors, groundtruth)
with self.assertRaises(ValueError):
targetassigner.create_target_assigner('InvalidDetector',
stage='invalid_stage')
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Convert raw PASCAL dataset to TFRecord for object_detection.
Example usage:
./create_pascal_tf_record --data_dir=/home/user/VOCdevkit \
--year=VOC2012 \
--output_path=/home/user/pascal.record
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import hashlib
import io
import logging
import os
from lxml import etree
import PIL.Image
import tensorflow as tf
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
flags = tf.app.flags
flags.DEFINE_string('data_dir', '', 'Root directory to raw PASCAL VOC dataset.')
flags.DEFINE_enum('set', 'train', ['train', 'val', 'trainval', 'test'],
'Convert training set, validation set or merged set.')
flags.DEFINE_string('annotations_dir', 'Annotations',
'(Relative) path to annotations directory.')
flags.DEFINE_enum('year', 'VOC2007', ['VOC2007', 'VOC2012', 'merged'],
'Desired challenge year.')
flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
flags.DEFINE_string('label_map_path', 'data/pascal_label_map.pbtxt',
'Path to label map proto')
flags.DEFINE_boolean('ignore_difficult_instances', False, 'Whether to ignore '
'difficult instances')
FLAGS = flags.FLAGS
def dict_to_tf_example(data,
dataset_directory,
label_map_dict,
ignore_difficult_instances=False,
image_subdirectory='JPEGImages'):
"""Convert XML derived dict to tf.Example proto.
Notice that this function normalizes the bounding box coordinates provided
by the raw data.
Args:
data: dict holding PASCAL XML fields for a single image (obtained by
running dataset_util.recursive_parse_xml_to_dict)
dataset_directory: Path to root directory holding PASCAL dataset
label_map_dict: A map from string label names to integers ids.
ignore_difficult_instances: Whether to skip difficult instances in the
dataset (default: False).
image_subdirectory: String specifying subdirectory within the
PASCAL dataset directory holding the actual image data.
Returns:
example: The converted tf.Example.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
"""
img_path = os.path.join(data['folder'], image_subdirectory, data['filename'])
full_path = os.path.join(dataset_directory, img_path)
with tf.gfile.GFile(full_path) as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = PIL.Image.open(encoded_jpg_io)
if image.format != 'JPEG':
raise ValueError('Image format not JPEG')
key = hashlib.sha256(encoded_jpg).hexdigest()
width = int(data['size']['width'])
height = int(data['size']['height'])
xmin = []
ymin = []
xmax = []
ymax = []
classes = []
classes_text = []
truncated = []
poses = []
difficult_obj = []
for obj in data['object']:
difficult = bool(int(obj['difficult']))
if ignore_difficult_instances and difficult:
continue
difficult_obj.append(int(difficult))
xmin.append(float(obj['bndbox']['xmin']) / width)
ymin.append(float(obj['bndbox']['ymin']) / height)
xmax.append(float(obj['bndbox']['xmax']) / width)
ymax.append(float(obj['bndbox']['ymax']) / height)
classes_text.append(obj['name'])
classes.append(label_map_dict[obj['name']])
truncated.append(int(obj['truncated']))
poses.append(obj['pose'])
example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(data['filename']),
'image/source_id': dataset_util.bytes_feature(data['filename']),
'image/key/sha256': dataset_util.bytes_feature(key),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature('jpeg'),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
'image/object/truncated': dataset_util.int64_list_feature(truncated),
'image/object/view': dataset_util.bytes_list_feature(poses),
}))
return example
def main(_):
data_dir = FLAGS.data_dir
years = ['VOC2007', 'VOC2012']
if FLAGS.year != 'merged':
years = [FLAGS.year]
writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
for year in years:
logging.info('Reading from PASCAL %s dataset.', year)
examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main',
'aeroplane_' + FLAGS.set + '.txt')
annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir)
examples_list = dataset_util.read_examples_list(examples_path)
for idx, example in enumerate(examples_list):
if idx % 100 == 0:
logging.info('On image %d of %d', idx, len(examples_list))
path = os.path.join(annotations_dir, example + '.xml')
with tf.gfile.GFile(path, 'r') as fid:
xml_str = fid.read()
xml = etree.fromstring(xml_str)
data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
FLAGS.ignore_difficult_instances)
writer.write(tf_example.SerializeToString())
writer.close()
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test for create_pascal_tf_record.py."""
import os
import numpy as np
import PIL.Image
import tensorflow as tf
from object_detection import create_pascal_tf_record
class DictToTFExampleTest(tf.test.TestCase):
def _assertProtoEqual(self, proto_field, expectation):
"""Helper function to assert if a proto field equals some value.
Args:
proto_field: The protobuf field to compare.
expectation: The expected value of the protobuf field.
"""
proto_list = [p for p in proto_field]
self.assertListEqual(proto_list, expectation)
def test_dict_to_tf_example(self):
image_file_name = 'tmp_image.jpg'
image_data = np.random.rand(256, 256, 3)
save_path = os.path.join(self.get_temp_dir(), image_file_name)
image = PIL.Image.fromarray(image_data, 'RGB')
image.save(save_path)
data = {
'folder': '',
'filename': image_file_name,
'size': {
'height': 256,
'width': 256,
},
'object': [
{
'difficult': 1,
'bndbox': {
'xmin': 64,
'ymin': 64,
'xmax': 192,
'ymax': 192,
},
'name': 'person',
'truncated': 0,
'pose': '',
},
],
}
label_map_dict = {
'background': 0,
'person': 1,
'notperson': 2,
}
example = create_pascal_tf_record.dict_to_tf_example(
data, self.get_temp_dir(), label_map_dict, image_subdirectory='')
self._assertProtoEqual(
example.features.feature['image/height'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/width'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/filename'].bytes_list.value,
[image_file_name])
self._assertProtoEqual(
example.features.feature['image/source_id'].bytes_list.value,
[image_file_name])
self._assertProtoEqual(
example.features.feature['image/format'].bytes_list.value, ['jpeg'])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/class/text'].bytes_list.value,
['person'])
self._assertProtoEqual(
example.features.feature['image/object/class/label'].int64_list.value,
[1])
self._assertProtoEqual(
example.features.feature['image/object/difficult'].int64_list.value,
[1])
self._assertProtoEqual(
example.features.feature['image/object/truncated'].int64_list.value,
[0])
self._assertProtoEqual(
example.features.feature['image/object/view'].bytes_list.value, [''])
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Convert the Oxford pet dataset to TFRecord for object_detection.
See: O. M. Parkhi, A. Vedaldi, A. Zisserman, C. V. Jawahar
Cats and Dogs
IEEE Conference on Computer Vision and Pattern Recognition, 2012
http://www.robots.ox.ac.uk/~vgg/data/pets/
Example usage:
./create_pet_tf_record --data_dir=/home/user/pet \
--output_dir=/home/user/pet/output
"""
import hashlib
import io
import logging
import os
import random
import re
from lxml import etree
import PIL.Image
import tensorflow as tf
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
flags = tf.app.flags
flags.DEFINE_string('data_dir', '', 'Root directory to raw pet dataset.')
flags.DEFINE_string('output_dir', '', 'Path to directory to output TFRecords.')
flags.DEFINE_string('label_map_path', 'data/pet_label_map.pbtxt',
'Path to label map proto')
FLAGS = flags.FLAGS
def get_class_name_from_filename(file_name):
"""Gets the class name from a file.
Args:
file_name: The file name to get the class name from.
ie. "american_pit_bull_terrier_105.jpg"
Returns:
example: The converted tf.Example.
"""
match = re.match(r'([A-Za-z_]+)(_[0-9]+\.jpg)', file_name, re.I)
return match.groups()[0]
def dict_to_tf_example(data,
label_map_dict,
image_subdirectory,
ignore_difficult_instances=False):
"""Convert XML derived dict to tf.Example proto.
Notice that this function normalizes the bounding box coordinates provided
by the raw data.
Args:
data: dict holding PASCAL XML fields for a single image (obtained by
running dataset_util.recursive_parse_xml_to_dict)
label_map_dict: A map from string label names to integers ids.
image_subdirectory: String specifying subdirectory within the
Pascal dataset directory holding the actual image data.
ignore_difficult_instances: Whether to skip difficult instances in the
dataset (default: False).
Returns:
example: The converted tf.Example.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
"""
img_path = os.path.join(image_subdirectory, data['filename'])
with tf.gfile.GFile(img_path) as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = PIL.Image.open(encoded_jpg_io)
if image.format != 'JPEG':
raise ValueError('Image format not JPEG')
key = hashlib.sha256(encoded_jpg).hexdigest()
width = int(data['size']['width'])
height = int(data['size']['height'])
xmin = []
ymin = []
xmax = []
ymax = []
classes = []
classes_text = []
truncated = []
poses = []
difficult_obj = []
for obj in data['object']:
difficult = bool(int(obj['difficult']))
if ignore_difficult_instances and difficult:
continue
difficult_obj.append(int(difficult))
xmin.append(float(obj['bndbox']['xmin']) / width)
ymin.append(float(obj['bndbox']['ymin']) / height)
xmax.append(float(obj['bndbox']['xmax']) / width)
ymax.append(float(obj['bndbox']['ymax']) / height)
class_name = get_class_name_from_filename(data['filename'])
classes_text.append(class_name)
classes.append(label_map_dict[class_name])
truncated.append(int(obj['truncated']))
poses.append(obj['pose'])
example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(data['filename']),
'image/source_id': dataset_util.bytes_feature(data['filename']),
'image/key/sha256': dataset_util.bytes_feature(key),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature('jpeg'),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
'image/object/truncated': dataset_util.int64_list_feature(truncated),
'image/object/view': dataset_util.bytes_list_feature(poses),
}))
return example
def create_tf_record(output_filename,
label_map_dict,
annotations_dir,
image_dir,
examples):
"""Creates a TFRecord file from examples.
Args:
output_filename: Path to where output file is saved.
label_map_dict: The label map dictionary.
annotations_dir: Directory where annotation files are stored.
image_dir: Directory where image files are stored.
examples: Examples to parse and save to tf record.
"""
writer = tf.python_io.TFRecordWriter(output_filename)
for idx, example in enumerate(examples):
if idx % 100 == 0:
logging.info('On image %d of %d', idx, len(examples))
path = os.path.join(annotations_dir, 'xmls', example + '.xml')
if not os.path.exists(path):
logging.warning('Could not find %s, ignoring example.', path)
continue
with tf.gfile.GFile(path, 'r') as fid:
xml_str = fid.read()
xml = etree.fromstring(xml_str)
data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
tf_example = dict_to_tf_example(data, label_map_dict, image_dir)
writer.write(tf_example.SerializeToString())
writer.close()
# TODO: Add test for pet/PASCAL main files.
def main(_):
data_dir = FLAGS.data_dir
label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
logging.info('Reading from Pet dataset.')
image_dir = os.path.join(data_dir, 'images')
annotations_dir = os.path.join(data_dir, 'annotations')
examples_path = os.path.join(annotations_dir, 'trainval.txt')
examples_list = dataset_util.read_examples_list(examples_path)
# Test images are not included in the downloaded data set, so we shall perform
# our own split.
random.seed(42)
random.shuffle(examples_list)
num_examples = len(examples_list)
num_train = int(0.7 * num_examples)
train_examples = examples_list[:num_train]
val_examples = examples_list[num_train:]
logging.info('%d training and %d validation examples.',
len(train_examples), len(val_examples))
train_output_path = os.path.join(FLAGS.output_dir, 'pet_train.record')
val_output_path = os.path.join(FLAGS.output_dir, 'pet_val.record')
create_tf_record(train_output_path, label_map_dict, annotations_dir,
image_dir, train_examples)
create_tf_record(val_output_path, label_map_dict, annotations_dir,
image_dir, val_examples)
if __name__ == '__main__':
tf.app.run()
item {
name: "/m/01g317"
id: 1
display_name: "person"
}
item {
name: "/m/0199g"
id: 2
display_name: "bicycle"
}
item {
name: "/m/0k4j"
id: 3
display_name: "car"
}
item {
name: "/m/04_sv"
id: 4
display_name: "motorcycle"
}
item {
name: "/m/05czz6l"
id: 5
display_name: "airplane"
}
item {
name: "/m/01bjv"
id: 6
display_name: "bus"
}
item {
name: "/m/07jdr"
id: 7
display_name: "train"
}
item {
name: "/m/07r04"
id: 8
display_name: "truck"
}
item {
name: "/m/019jd"
id: 9
display_name: "boat"
}
item {
name: "/m/015qff"
id: 10
display_name: "traffic light"
}
item {
name: "/m/01pns0"
id: 11
display_name: "fire hydrant"
}
item {
name: "/m/02pv19"
id: 13
display_name: "stop sign"
}
item {
name: "/m/015qbp"
id: 14
display_name: "parking meter"
}
item {
name: "/m/0cvnqh"
id: 15
display_name: "bench"
}
item {
name: "/m/015p6"
id: 16
display_name: "bird"
}
item {
name: "/m/01yrx"
id: 17
display_name: "cat"
}
item {
name: "/m/0bt9lr"
id: 18
display_name: "dog"
}
item {
name: "/m/03k3r"
id: 19
display_name: "horse"
}
item {
name: "/m/07bgp"
id: 20
display_name: "sheep"
}
item {
name: "/m/01xq0k1"
id: 21
display_name: "cow"
}
item {
name: "/m/0bwd_0j"
id: 22
display_name: "elephant"
}
item {
name: "/m/01dws"
id: 23
display_name: "bear"
}
item {
name: "/m/0898b"
id: 24
display_name: "zebra"
}
item {
name: "/m/03bk1"
id: 25
display_name: "giraffe"
}
item {
name: "/m/01940j"
id: 27
display_name: "backpack"
}
item {
name: "/m/0hnnb"
id: 28
display_name: "umbrella"
}
item {
name: "/m/080hkjn"
id: 31
display_name: "handbag"
}
item {
name: "/m/01rkbr"
id: 32
display_name: "tie"
}
item {
name: "/m/01s55n"
id: 33
display_name: "suitcase"
}
item {
name: "/m/02wmf"
id: 34
display_name: "frisbee"
}
item {
name: "/m/071p9"
id: 35
display_name: "skis"
}
item {
name: "/m/06__v"
id: 36
display_name: "snowboard"
}
item {
name: "/m/018xm"
id: 37
display_name: "sports ball"
}
item {
name: "/m/02zt3"
id: 38
display_name: "kite"
}
item {
name: "/m/03g8mr"
id: 39
display_name: "baseball bat"
}
item {
name: "/m/03grzl"
id: 40
display_name: "baseball glove"
}
item {
name: "/m/06_fw"
id: 41
display_name: "skateboard"
}
item {
name: "/m/019w40"
id: 42
display_name: "surfboard"
}
item {
name: "/m/0dv9c"
id: 43
display_name: "tennis racket"
}
item {
name: "/m/04dr76w"
id: 44
display_name: "bottle"
}
item {
name: "/m/09tvcd"
id: 46
display_name: "wine glass"
}
item {
name: "/m/08gqpm"
id: 47
display_name: "cup"
}
item {
name: "/m/0dt3t"
id: 48
display_name: "fork"
}
item {
name: "/m/04ctx"
id: 49
display_name: "knife"
}
item {
name: "/m/0cmx8"
id: 50
display_name: "spoon"
}
item {
name: "/m/04kkgm"
id: 51
display_name: "bowl"
}
item {
name: "/m/09qck"
id: 52
display_name: "banana"
}
item {
name: "/m/014j1m"
id: 53
display_name: "apple"
}
item {
name: "/m/0l515"
id: 54
display_name: "sandwich"
}
item {
name: "/m/0cyhj_"
id: 55
display_name: "orange"
}
item {
name: "/m/0hkxq"
id: 56
display_name: "broccoli"
}
item {
name: "/m/0fj52s"
id: 57
display_name: "carrot"
}
item {
name: "/m/01b9xk"
id: 58
display_name: "hot dog"
}
item {
name: "/m/0663v"
id: 59
display_name: "pizza"
}
item {
name: "/m/0jy4k"
id: 60
display_name: "donut"
}
item {
name: "/m/0fszt"
id: 61
display_name: "cake"
}
item {
name: "/m/01mzpv"
id: 62
display_name: "chair"
}
item {
name: "/m/02crq1"
id: 63
display_name: "couch"
}
item {
name: "/m/03fp41"
id: 64
display_name: "potted plant"
}
item {
name: "/m/03ssj5"
id: 65
display_name: "bed"
}
item {
name: "/m/04bcr3"
id: 67
display_name: "dining table"
}
item {
name: "/m/09g1w"
id: 70
display_name: "toilet"
}
item {
name: "/m/07c52"
id: 72
display_name: "tv"
}
item {
name: "/m/01c648"
id: 73
display_name: "laptop"
}
item {
name: "/m/020lf"
id: 74
display_name: "mouse"
}
item {
name: "/m/0qjjc"
id: 75
display_name: "remote"
}
item {
name: "/m/01m2v"
id: 76
display_name: "keyboard"
}
item {
name: "/m/050k8"
id: 77
display_name: "cell phone"
}
item {
name: "/m/0fx9l"
id: 78
display_name: "microwave"
}
item {
name: "/m/029bxz"
id: 79
display_name: "oven"
}
item {
name: "/m/01k6s3"
id: 80
display_name: "toaster"
}
item {
name: "/m/0130jx"
id: 81
display_name: "sink"
}
item {
name: "/m/040b_t"
id: 82
display_name: "refrigerator"
}
item {
name: "/m/0bt_c3"
id: 84
display_name: "book"
}
item {
name: "/m/01x3z"
id: 85
display_name: "clock"
}
item {
name: "/m/02s195"
id: 86
display_name: "vase"
}
item {
name: "/m/01lsmm"
id: 87
display_name: "scissors"
}
item {
name: "/m/0kmg4"
id: 88
display_name: "teddy bear"
}
item {
name: "/m/03wvsk"
id: 89
display_name: "hair drier"
}
item {
name: "/m/012xff"
id: 90
display_name: "toothbrush"
}
item {
id: 0
name: 'none_of_the_above'
}
item {
id: 1
name: 'aeroplane'
}
item {
id: 2
name: 'bicycle'
}
item {
id: 3
name: 'bird'
}
item {
id: 4
name: 'boat'
}
item {
id: 5
name: 'bottle'
}
item {
id: 6
name: 'bus'
}
item {
id: 7
name: 'car'
}
item {
id: 8
name: 'cat'
}
item {
id: 9
name: 'chair'
}
item {
id: 10
name: 'cow'
}
item {
id: 11
name: 'diningtable'
}
item {
id: 12
name: 'dog'
}
item {
id: 13
name: 'horse'
}
item {
id: 14
name: 'motorbike'
}
item {
id: 15
name: 'person'
}
item {
id: 16
name: 'pottedplant'
}
item {
id: 17
name: 'sheep'
}
item {
id: 18
name: 'sofa'
}
item {
id: 19
name: 'train'
}
item {
id: 20
name: 'tvmonitor'
}
item {
id: 0
name: 'none_of_the_above'
}
item {
id: 1
name: 'Abyssinian'
}
item {
id: 2
name: 'american_bulldog'
}
item {
id: 3
name: 'american_pit_bull_terrier'
}
item {
id: 4
name: 'basset_hound'
}
item {
id: 5
name: 'beagle'
}
item {
id: 6
name: 'Bengal'
}
item {
id: 7
name: 'Birman'
}
item {
id: 8
name: 'Bombay'
}
item {
id: 9
name: 'boxer'
}
item {
id: 10
name: 'British_Shorthair'
}
item {
id: 11
name: 'chihuahua'
}
item {
id: 12
name: 'Egyptian_Mau'
}
item {
id: 13
name: 'english_cocker_spaniel'
}
item {
id: 14
name: 'english_setter'
}
item {
id: 15
name: 'german_shorthaired'
}
item {
id: 16
name: 'great_pyrenees'
}
item {
id: 17
name: 'havanese'
}
item {
id: 18
name: 'japanese_chin'
}
item {
id: 19
name: 'keeshond'
}
item {
id: 20
name: 'leonberger'
}
item {
id: 21
name: 'Maine_Coon'
}
item {
id: 22
name: 'miniature_pinscher'
}
item {
id: 23
name: 'newfoundland'
}
item {
id: 24
name: 'Persian'
}
item {
id: 25
name: 'pomeranian'
}
item {
id: 26
name: 'pug'
}
item {
id: 27
name: 'Ragdoll'
}
item {
id: 28
name: 'Russian_Blue'
}
item {
id: 29
name: 'saint_bernard'
}
item {
id: 30
name: 'samoyed'
}
item {
id: 31
name: 'scottish_terrier'
}
item {
id: 32
name: 'shiba_inu'
}
item {
id: 33
name: 'Siamese'
}
item {
id: 34
name: 'Sphynx'
}
item {
id: 35
name: 'staffordshire_bull_terrier'
}
item {
id: 36
name: 'wheaten_terrier'
}
item {
id: 37
name: 'yorkshire_terrier'
}
# Tensorflow Object Detection API: data decoders.
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
# Apache 2.0
py_library(
name = "tf_example_decoder",
srcs = ["tf_example_decoder.py"],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/core:data_decoder",
"//tensorflow_models/object_detection/core:standard_fields",
],
)
py_test(
name = "tf_example_decoder_test",
srcs = ["tf_example_decoder_test.py"],
deps = [
":tf_example_decoder",
"//tensorflow",
"//tensorflow_models/object_detection/core:standard_fields",
],
)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tensorflow Example proto decoder for object detection.
A decoder to decode string tensors containing serialized tensorflow.Example
protos for object detection.
"""
import tensorflow as tf
from object_detection.core import data_decoder
from object_detection.core import standard_fields as fields
slim_example_decoder = tf.contrib.slim.tfexample_decoder
class TfExampleDecoder(data_decoder.DataDecoder):
"""Tensorflow Example proto decoder."""
def __init__(self):
"""Constructor sets keys_to_features and items_to_handlers."""
self.keys_to_features = {
'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'),
'image/filename': tf.FixedLenFeature((), tf.string, default_value=''),
'image/key/sha256': tf.FixedLenFeature((), tf.string, default_value=''),
'image/source_id': tf.FixedLenFeature((), tf.string, default_value=''),
'image/height': tf.FixedLenFeature((), tf.int64, 1),
'image/width': tf.FixedLenFeature((), tf.int64, 1),
# Object boxes and classes.
'image/object/bbox/xmin': tf.VarLenFeature(tf.float32),
'image/object/bbox/xmax': tf.VarLenFeature(tf.float32),
'image/object/bbox/ymin': tf.VarLenFeature(tf.float32),
'image/object/bbox/ymax': tf.VarLenFeature(tf.float32),
'image/object/class/label': tf.VarLenFeature(tf.int64),
'image/object/area': tf.VarLenFeature(tf.float32),
'image/object/is_crowd': tf.VarLenFeature(tf.int64),
'image/object/difficult': tf.VarLenFeature(tf.int64),
# Instance masks and classes.
'image/segmentation/object': tf.VarLenFeature(tf.int64),
'image/segmentation/object/class': tf.VarLenFeature(tf.int64)
}
self.items_to_handlers = {
fields.InputDataFields.image: slim_example_decoder.Image(
image_key='image/encoded', format_key='image/format', channels=3),
fields.InputDataFields.source_id: (
slim_example_decoder.Tensor('image/source_id')),
fields.InputDataFields.key: (
slim_example_decoder.Tensor('image/key/sha256')),
fields.InputDataFields.filename: (
slim_example_decoder.Tensor('image/filename')),
# Object boxes and classes.
fields.InputDataFields.groundtruth_boxes: (
slim_example_decoder.BoundingBox(
['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/')),
fields.InputDataFields.groundtruth_classes: (
slim_example_decoder.Tensor('image/object/class/label')),
fields.InputDataFields.groundtruth_area: slim_example_decoder.Tensor(
'image/object/area'),
fields.InputDataFields.groundtruth_is_crowd: (
slim_example_decoder.Tensor('image/object/is_crowd')),
fields.InputDataFields.groundtruth_difficult: (
slim_example_decoder.Tensor('image/object/difficult')),
# Instance masks and classes.
fields.InputDataFields.groundtruth_instance_masks: (
slim_example_decoder.ItemHandlerCallback(
['image/segmentation/object', 'image/height', 'image/width'],
self._reshape_instance_masks)),
fields.InputDataFields.groundtruth_instance_classes: (
slim_example_decoder.Tensor('image/segmentation/object/class')),
}
def Decode(self, tf_example_string_tensor):
"""Decodes serialized tensorflow example and returns a tensor dictionary.
Args:
tf_example_string_tensor: a string tensor holding a serialized tensorflow
example proto.
Returns:
A dictionary of the following tensors.
fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3]
containing image.
fields.InputDataFields.source_id - string tensor containing original
image id.
fields.InputDataFields.key - string tensor with unique sha256 hash key.
fields.InputDataFields.filename - string tensor with original dataset
filename.
fields.InputDataFields.groundtruth_boxes - 2D float32 tensor of shape
[None, 4] containing box corners.
fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape
[None] containing classes for the boxes.
fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape
[None] containing containing object mask area in pixel squared.
fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape
[None] indicating if the boxes enclose a crowd.
fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape
[None] indicating if the boxes represent `difficult` instances.
fields.InputDataFields.groundtruth_instance_masks - 3D int64 tensor of
shape [None, None, None] containing instance masks.
fields.InputDataFields.groundtruth_instance_classes - 1D int64 tensor
of shape [None] containing classes for the instance masks.
"""
serialized_example = tf.reshape(tf_example_string_tensor, shape=[])
decoder = slim_example_decoder.TFExampleDecoder(self.keys_to_features,
self.items_to_handlers)
keys = decoder.list_items()
tensors = decoder.decode(serialized_example, items=keys)
tensor_dict = dict(zip(keys, tensors))
is_crowd = fields.InputDataFields.groundtruth_is_crowd
tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
return tensor_dict
def _reshape_instance_masks(self, keys_to_tensors):
"""Reshape instance segmentation masks.
The instance segmentation masks are reshaped to [num_instances, height,
width] and cast to boolean type to save memory.
Args:
keys_to_tensors: a dictionary from keys to tensors.
Returns:
A 3-D boolean tensor of shape [num_instances, height, width].
"""
masks = keys_to_tensors['image/segmentation/object']
if isinstance(masks, tf.SparseTensor):
masks = tf.sparse_tensor_to_dense(masks)
height = keys_to_tensors['image/height']
width = keys_to_tensors['image/width']
to_shape = tf.cast(tf.stack([-1, height, width]), tf.int32)
return tf.cast(tf.reshape(masks, to_shape), tf.bool)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.data_decoders.tf_example_decoder."""
import numpy as np
import tensorflow as tf
from object_detection.core import standard_fields as fields
from object_detection.data_decoders import tf_example_decoder
class TfExampleDecoderTest(tf.test.TestCase):
def _EncodeImage(self, image_tensor, encoding_type='jpeg'):
with self.test_session():
if encoding_type == 'jpeg':
image_encoded = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
elif encoding_type == 'png':
image_encoded = tf.image.encode_png(tf.constant(image_tensor)).eval()
else:
raise ValueError('Invalid encoding type.')
return image_encoded
def _DecodeImage(self, image_encoded, encoding_type='jpeg'):
with self.test_session():
if encoding_type == 'jpeg':
image_decoded = tf.image.decode_jpeg(tf.constant(image_encoded)).eval()
elif encoding_type == 'png':
image_decoded = tf.image.decode_png(tf.constant(image_encoded)).eval()
else:
raise ValueError('Invalid encoding type.')
return image_decoded
def _Int64Feature(self, value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
def _FloatFeature(self, value):
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
def _BytesFeature(self, value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def testDecodeJpegImage(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
decoded_jpeg = self._DecodeImage(encoded_jpeg)
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/source_id': self._BytesFeature('image_id'),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.Decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
get_shape().as_list()), [None, None, 3])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(decoded_jpeg, tensor_dict[fields.InputDataFields.image])
self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
def testDecodeImageKeyAndFilename(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/key/sha256': self._BytesFeature('abc'),
'image/filename': self._BytesFeature('filename')
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.Decode(tf.convert_to_tensor(example))
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertEqual('abc', tensor_dict[fields.InputDataFields.key])
self.assertEqual('filename', tensor_dict[fields.InputDataFields.filename])
def testDecodePngImage(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_png = self._EncodeImage(image_tensor, encoding_type='png')
decoded_png = self._DecodeImage(encoded_png, encoding_type='png')
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_png),
'image/format': self._BytesFeature('png'),
'image/source_id': self._BytesFeature('image_id')
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.Decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
get_shape().as_list()), [None, None, 3])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(decoded_png, tensor_dict[fields.InputDataFields.image])
self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
def testDecodeBoundingBox(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_ymins = [0.0, 4.0]
bbox_xmins = [1.0, 5.0]
bbox_ymaxs = [2.0, 6.0]
bbox_xmaxs = [3.0, 7.0]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/bbox/ymin': self._FloatFeature(bbox_ymins),
'image/object/bbox/xmin': self._FloatFeature(bbox_xmins),
'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs),
'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.Decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes].
get_shape().as_list()), [None, 4])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
expected_boxes = np.vstack([bbox_ymins, bbox_xmins,
bbox_ymaxs, bbox_xmaxs]).transpose()
self.assertAllEqual(expected_boxes,
tensor_dict[fields.InputDataFields.groundtruth_boxes])
def testDecodeObjectLabel(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_classes = [0, 1]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/class/label': self._Int64Feature(bbox_classes),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.Decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_classes].get_shape().as_list()),
[None])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(bbox_classes,
tensor_dict[fields.InputDataFields.groundtruth_classes])
def testDecodeObjectArea(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_area = [100., 174.]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/area': self._FloatFeature(object_area),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.Decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_area].
get_shape().as_list()), [None])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(object_area,
tensor_dict[fields.InputDataFields.groundtruth_area])
def testDecodeObjectIsCrowd(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_is_crowd = [0, 1]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/is_crowd': self._Int64Feature(object_is_crowd),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.Decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_is_crowd].get_shape().as_list()),
[None])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual([bool(item) for item in object_is_crowd],
tensor_dict[
fields.InputDataFields.groundtruth_is_crowd])
def testDecodeObjectDifficult(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_difficult = [0, 1]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/difficult': self._Int64Feature(object_difficult),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.Decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_difficult].get_shape().as_list()),
[None])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual([bool(item) for item in object_difficult],
tensor_dict[
fields.InputDataFields.groundtruth_difficult])
def testDecodeInstanceSegmentation(self):
num_instances = 4
image_height = 5
image_width = 3
# Randomly generate image.
image_tensor = np.random.randint(255, size=(image_height,
image_width,
3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
# Randomly generate instance segmentation masks.
instance_segmentation = (
np.random.randint(2, size=(num_instances,
image_height,
image_width)).astype(np.int64))
# Randomly generate class labels for each instance.
instance_segmentation_classes = np.random.randint(
100, size=(num_instances)).astype(np.int64)
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/height': self._Int64Feature([image_height]),
'image/width': self._Int64Feature([image_width]),
'image/segmentation/object': self._Int64Feature(
instance_segmentation.flatten()),
'image/segmentation/object/class': self._Int64Feature(
instance_segmentation_classes)})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.Decode(tf.convert_to_tensor(example))
self.assertAllEqual((
tensor_dict[fields.InputDataFields.groundtruth_instance_masks].
get_shape().as_list()), [None, None, None])
self.assertAllEqual((
tensor_dict[fields.InputDataFields.groundtruth_instance_classes].
get_shape().as_list()), [None])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(
instance_segmentation.astype(np.bool),
tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
self.assertAllEqual(
instance_segmentation_classes,
tensor_dict[fields.InputDataFields.groundtruth_instance_classes])
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Evaluation executable for detection models.
This executable is used to evaluate DetectionModels. There are two ways of
configuring the eval job.
1) A single pipeline_pb2.TrainEvalPipelineConfig file maybe specified instead.
In this mode, the --eval_training_data flag may be given to force the pipeline
to evaluate on training data instead.
Example usage:
./eval \
--logtostderr \
--checkpoint_dir=path/to/checkpoint_dir \
--eval_dir=path/to/eval_dir \
--pipeline_config_path=pipeline_config.pbtxt
2) Three configuration files may be provided: a model_pb2.DetectionModel
configuration file to define what type of DetectionModel is being evaulated, an
input_reader_pb2.InputReader file to specify what data the model is evaluating
and an eval_pb2.EvalConfig file to configure evaluation parameters.
Example usage:
./eval \
--logtostderr \
--checkpoint_dir=path/to/checkpoint_dir \
--eval_dir=path/to/eval_dir \
--eval_config_path=eval_config.pbtxt \
--model_config_path=model_config.pbtxt \
--input_config_path=eval_input_config.pbtxt
"""
import functools
import tensorflow as tf
from google.protobuf import text_format
from object_detection import evaluator
from object_detection.builders import input_reader_builder
from object_detection.builders import model_builder
from object_detection.protos import eval_pb2
from object_detection.protos import input_reader_pb2
from object_detection.protos import model_pb2
from object_detection.protos import pipeline_pb2
from object_detection.utils import label_map_util
tf.logging.set_verbosity(tf.logging.INFO)
flags = tf.app.flags
flags.DEFINE_boolean('eval_training_data', False,
'If training data should be evaluated for this job.')
flags.DEFINE_string('checkpoint_dir', '',
'Directory containing checkpoints to evaluate, typically '
'set to `train_dir` used in the training job.')
flags.DEFINE_string('eval_dir', '',
'Directory to write eval summaries to.')
flags.DEFINE_string('pipeline_config_path', '',
'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
'file. If provided, other configs are ignored')
flags.DEFINE_string('eval_config_path', '',
'Path to an eval_pb2.EvalConfig config file.')
flags.DEFINE_string('input_config_path', '',
'Path to an input_reader_pb2.InputReader config file.')
flags.DEFINE_string('model_config_path', '',
'Path to a model_pb2.DetectionModel config file.')
FLAGS = flags.FLAGS
def get_configs_from_pipeline_file():
"""Reads evaluation configuration from a pipeline_pb2.TrainEvalPipelineConfig.
Reads evaluation config from file specified by pipeline_config_path flag.
Returns:
model_config: a model_pb2.DetectionModel
eval_config: a eval_pb2.EvalConfig
input_config: a input_reader_pb2.InputReader
"""
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f:
text_format.Merge(f.read(), pipeline_config)
model_config = pipeline_config.model
if FLAGS.eval_training_data:
eval_config = pipeline_config.train_config
else:
eval_config = pipeline_config.eval_config
input_config = pipeline_config.eval_input_reader
return model_config, eval_config, input_config
def get_configs_from_multiple_files():
"""Reads evaluation configuration from multiple config files.
Reads the evaluation config from the following files:
model_config: Read from --model_config_path
eval_config: Read from --eval_config_path
input_config: Read from --input_config_path
Returns:
model_config: a model_pb2.DetectionModel
eval_config: a eval_pb2.EvalConfig
input_config: a input_reader_pb2.InputReader
"""
eval_config = eval_pb2.EvalConfig()
with tf.gfile.GFile(FLAGS.eval_config_path, 'r') as f:
text_format.Merge(f.read(), eval_config)
model_config = model_pb2.DetectionModel()
with tf.gfile.GFile(FLAGS.model_config_path, 'r') as f:
text_format.Merge(f.read(), model_config)
input_config = input_reader_pb2.InputReader()
with tf.gfile.GFile(FLAGS.input_config_path, 'r') as f:
text_format.Merge(f.read(), input_config)
return model_config, eval_config, input_config
def main(unused_argv):
assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.'
assert FLAGS.eval_dir, '`eval_dir` is missing.'
if FLAGS.pipeline_config_path:
model_config, eval_config, input_config = get_configs_from_pipeline_file()
else:
model_config, eval_config, input_config = get_configs_from_multiple_files()
model_fn = functools.partial(
model_builder.build,
model_config=model_config,
is_training=False)
create_input_dict_fn = functools.partial(
input_reader_builder.build,
input_config)
label_map = label_map_util.load_labelmap(input_config.label_map_path)
max_num_classes = max([item.id for item in label_map.item])
categories = label_map_util.convert_label_map_to_categories(
label_map, max_num_classes)
evaluator.evaluate(create_input_dict_fn, model_fn, eval_config, categories,
FLAGS.checkpoint_dir, FLAGS.eval_dir)
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Common functions for repeatedly evaluating a checkpoint.
"""
import copy
import logging
import os
import time
import numpy as np
import tensorflow as tf
from object_detection.utils import label_map_util
from object_detection.utils import object_detection_evaluation
from object_detection.utils import visualization_utils as vis_utils
slim = tf.contrib.slim
def write_metrics(metrics, global_step, summary_dir):
"""Write metrics to a summary directory.
Args:
metrics: A dictionary containing metric names and values.
global_step: Global step at which the metrics are computed.
summary_dir: Directory to write tensorflow summaries to.
"""
logging.info('Writing metrics to tf summary.')
summary_writer = tf.summary.FileWriter(summary_dir)
for key in sorted(metrics):
summary = tf.Summary(value=[
tf.Summary.Value(tag=key, simple_value=metrics[key]),
])
summary_writer.add_summary(summary, global_step)
logging.info('%s: %f', key, metrics[key])
summary_writer.close()
logging.info('Metrics written to tf summary.')
def evaluate_detection_results_pascal_voc(result_lists,
categories,
label_id_offset=0,
iou_thres=0.5,
corloc_summary=False):
"""Computes Pascal VOC detection metrics given groundtruth and detections.
This function computes Pascal VOC metrics. This function by default
takes detections and groundtruth boxes encoded in result_lists and writes
evaluation results to tf summaries which can be viewed on tensorboard.
Args:
result_lists: a dictionary holding lists of groundtruth and detection
data corresponding to each image being evaluated. The following keys
are required:
'image_id': a list of string ids
'detection_boxes': a list of float32 numpy arrays of shape [N, 4]
'detection_scores': a list of float32 numpy arrays of shape [N]
'detection_classes': a list of int32 numpy arrays of shape [N]
'groundtruth_boxes': a list of float32 numpy arrays of shape [M, 4]
'groundtruth_classes': a list of int32 numpy arrays of shape [M]
and the remaining fields below are optional:
'difficult': a list of boolean arrays of shape [M] indicating the
difficulty of groundtruth boxes. Some datasets like PASCAL VOC provide
this information and it is used to remove difficult examples from eval
in order to not penalize the models on them.
Note that it is okay to have additional fields in result_lists --- they
are simply ignored.
categories: a list of dictionaries representing all possible categories.
Each dict in this list has the following keys:
'id': (required) an integer id uniquely identifying this category
'name': (required) string representing category name
e.g., 'cat', 'dog', 'pizza'
label_id_offset: an integer offset for the label space.
iou_thres: float determining the IoU threshold at which a box is considered
correct. Defaults to the standard 0.5.
corloc_summary: boolean. If True, also outputs CorLoc metrics.
Returns:
A dictionary of metric names to scalar values.
Raises:
ValueError: if the set of keys in result_lists is not a superset of the
expected list of keys. Unexpected keys are ignored.
ValueError: if the lists in result_lists have inconsistent sizes.
"""
# check for expected keys in result_lists
expected_keys = [
'detection_boxes', 'detection_scores', 'detection_classes', 'image_id'
]
expected_keys += ['groundtruth_boxes', 'groundtruth_classes']
if not set(expected_keys).issubset(set(result_lists.keys())):
raise ValueError('result_lists does not have expected key set.')
num_results = len(result_lists[expected_keys[0]])
for key in expected_keys:
if len(result_lists[key]) != num_results:
raise ValueError('Inconsistent list sizes in result_lists')
# Pascal VOC evaluator assumes foreground index starts from zero.
categories = copy.deepcopy(categories)
for idx in range(len(categories)):
categories[idx]['id'] -= label_id_offset
# num_classes (maybe encoded as categories)
num_classes = max([cat['id'] for cat in categories]) + 1
logging.info('Computing Pascal VOC metrics on results.')
if all(image_id.isdigit() for image_id in result_lists['image_id']):
image_ids = [int(image_id) for image_id in result_lists['image_id']]
else:
image_ids = range(num_results)
evaluator = object_detection_evaluation.ObjectDetectionEvaluation(
num_classes, matching_iou_threshold=iou_thres)
difficult_lists = None
if 'difficult' in result_lists and result_lists['difficult']:
difficult_lists = result_lists['difficult']
for idx, image_id in enumerate(image_ids):
difficult = None
if difficult_lists is not None and difficult_lists[idx].size:
difficult = difficult_lists[idx].astype(np.bool)
evaluator.add_single_ground_truth_image_info(
image_id, result_lists['groundtruth_boxes'][idx],
result_lists['groundtruth_classes'][idx] - label_id_offset,
difficult)
evaluator.add_single_detected_image_info(
image_id, result_lists['detection_boxes'][idx],
result_lists['detection_scores'][idx],
result_lists['detection_classes'][idx] - label_id_offset)
per_class_ap, mean_ap, _, _, per_class_corloc, mean_corloc = (
evaluator.evaluate())
metrics = {'Precision/mAP@{}IOU'.format(iou_thres): mean_ap}
category_index = label_map_util.create_category_index(categories)
for idx in range(per_class_ap.size):
if idx in category_index:
display_name = ('PerformanceByCategory/mAP@{}IOU/{}'
.format(iou_thres, category_index[idx]['name']))
metrics[display_name] = per_class_ap[idx]
if corloc_summary:
metrics['CorLoc/CorLoc@{}IOU'.format(iou_thres)] = mean_corloc
for idx in range(per_class_corloc.size):
if idx in category_index:
display_name = (
'PerformanceByCategory/CorLoc@{}IOU/{}'.format(
iou_thres, category_index[idx]['name']))
metrics[display_name] = per_class_corloc[idx]
return metrics
# TODO: Add tests.
def visualize_detection_results(result_dict,
tag,
global_step,
categories,
summary_dir='',
export_dir='',
agnostic_mode=False,
show_groundtruth=False,
min_score_thresh=.5,
max_num_predictions=20):
"""Visualizes detection results and writes visualizations to image summaries.
This function visualizes an image with its detected bounding boxes and writes
to image summaries which can be viewed on tensorboard. It optionally also
writes images to a directory. In the case of missing entry in the label map,
unknown class name in the visualization is shown as "N/A".
Args:
result_dict: a dictionary holding groundtruth and detection
data corresponding to each image being evaluated. The following keys
are required:
'original_image': a numpy array representing the image with shape
[1, height, width, 3]
'detection_boxes': a numpy array of shape [N, 4]
'detection_scores': a numpy array of shape [N]
'detection_classes': a numpy array of shape [N]
The following keys are optional:
'groundtruth_boxes': a numpy array of shape [N, 4]
'groundtruth_keypoints': a numpy array of shape [N, num_keypoints, 2]
Detections are assumed to be provided in decreasing order of score and for
display, and we assume that scores are probabilities between 0 and 1.
tag: tensorboard tag (string) to associate with image.
global_step: global step at which the visualization are generated.
categories: a list of dictionaries representing all possible categories.
Each dict in this list has the following keys:
'id': (required) an integer id uniquely identifying this category
'name': (required) string representing category name
e.g., 'cat', 'dog', 'pizza'
'supercategory': (optional) string representing the supercategory
e.g., 'animal', 'vehicle', 'food', etc
summary_dir: the output directory to which the image summaries are written.
export_dir: the output directory to which images are written. If this is
empty (default), then images are not exported.
agnostic_mode: boolean (default: False) controlling whether to evaluate in
class-agnostic mode or not.
show_groundtruth: boolean (default: False) controlling whether to show
groundtruth boxes in addition to detected boxes
min_score_thresh: minimum score threshold for a box to be visualized
max_num_predictions: maximum number of detections to visualize
Raises:
ValueError: if result_dict does not contain the expected keys (i.e.,
'original_image', 'detection_boxes', 'detection_scores',
'detection_classes')
"""
if not set([
'original_image', 'detection_boxes', 'detection_scores',
'detection_classes'
]).issubset(set(result_dict.keys())):
raise ValueError('result_dict does not contain all expected keys.')
if show_groundtruth and 'groundtruth_boxes' not in result_dict:
raise ValueError('If show_groundtruth is enabled, result_dict must contain '
'groundtruth_boxes.')
logging.info('Creating detection visualizations.')
category_index = label_map_util.create_category_index(categories)
image = np.squeeze(result_dict['original_image'], axis=0)
detection_boxes = result_dict['detection_boxes']
detection_scores = result_dict['detection_scores']
detection_classes = np.int32((result_dict['detection_classes']))
detection_keypoints = result_dict.get('detection_keypoints', None)
detection_masks = result_dict.get('detection_masks', None)
# Plot groundtruth underneath detections
if show_groundtruth:
groundtruth_boxes = result_dict['groundtruth_boxes']
groundtruth_keypoints = result_dict.get('groundtruth_keypoints', None)
vis_utils.visualize_boxes_and_labels_on_image_array(
image,
groundtruth_boxes,
None,
None,
category_index,
keypoints=groundtruth_keypoints,
use_normalized_coordinates=False,
max_boxes_to_draw=None)
vis_utils.visualize_boxes_and_labels_on_image_array(
image,
detection_boxes,
detection_classes,
detection_scores,
category_index,
instance_masks=detection_masks,
keypoints=detection_keypoints,
use_normalized_coordinates=False,
max_boxes_to_draw=max_num_predictions,
min_score_thresh=min_score_thresh,
agnostic_mode=agnostic_mode)
if export_dir:
export_path = os.path.join(export_dir, 'export-{}.png'.format(tag))
vis_utils.save_image_array_as_png(image, export_path)
summary = tf.Summary(value=[
tf.Summary.Value(tag=tag, image=tf.Summary.Image(
encoded_image_string=vis_utils.encode_image_array_as_png_str(
image)))
])
summary_writer = tf.summary.FileWriter(summary_dir)
summary_writer.add_summary(summary, global_step)
summary_writer.close()
logging.info('Detection visualizations written to summary with tag %s.', tag)
# TODO: Add tests.
# TODO: Have an argument called `aggregated_processor_tensor_keys` that contains
# a whitelist of tensors used by the `aggregated_result_processor` instead of a
# blacklist. This will prevent us from inadvertently adding any evaluated
# tensors into the `results_list` data structure that are not needed by
# `aggregated_result_preprocessor`.
def run_checkpoint_once(tensor_dict,
update_op,
summary_dir,
aggregated_result_processor=None,
batch_processor=None,
checkpoint_dirs=None,
variables_to_restore=None,
restore_fn=None,
num_batches=1,
master='',
save_graph=False,
save_graph_dir='',
metric_names_to_values=None,
keys_to_exclude_from_results=()):
"""Evaluates both python metrics and tensorflow slim metrics.
Python metrics are processed in batch by the aggregated_result_processor,
while tensorflow slim metrics statistics are computed by running
metric_names_to_updates tensors and aggregated using metric_names_to_values
tensor.
Args:
tensor_dict: a dictionary holding tensors representing a batch of detections
and corresponding groundtruth annotations.
update_op: a tensorflow update op that will run for each batch along with
the tensors in tensor_dict..
summary_dir: a directory to write metrics summaries.
aggregated_result_processor: a function taking one arguments:
1. result_lists: a dictionary with keys matching those in tensor_dict
and corresponding values being the list of results for each tensor
in tensor_dict. The length of each such list is num_batches.
batch_processor: a function taking four arguments:
1. tensor_dict: the same tensor_dict that is passed in as the first
argument to this function.
2. sess: a tensorflow session
3. batch_index: an integer representing the index of the batch amongst
all batches
4. update_op: a tensorflow update op that will run for each batch.
and returns result_dict, a dictionary of results for that batch.
By default, batch_processor is None, which defaults to running:
return sess.run(tensor_dict)
To skip an image, it suffices to return an empty dictionary in place of
result_dict.
checkpoint_dirs: list of directories to load into an EnsembleModel. If it
has only one directory, EnsembleModel will not be used -- a DetectionModel
will be instantiated directly. Not used if restore_fn is set.
variables_to_restore: None, or a dictionary mapping variable names found in
a checkpoint to model variables. The dictionary would normally be
generated by creating a tf.train.ExponentialMovingAverage object and
calling its variables_to_restore() method. Not used if restore_fn is set.
restore_fn: None, or a function that takes a tf.Session object and correctly
restores all necessary variables from the correct checkpoint file. If
None, attempts to restore from the first directory in checkpoint_dirs.
num_batches: the number of batches to use for evaluation.
master: the location of the Tensorflow session.
save_graph: whether or not the Tensorflow graph is stored as a pbtxt file.
save_graph_dir: where to store the Tensorflow graph on disk. If save_graph
is True this must be non-empty.
metric_names_to_values: A dictionary containing metric names to tensors
which will be evaluated after processing all batches
of [tensor_dict, update_op]. If any metrics depend on statistics computed
during each batch ensure that `update_op` tensor has a control dependency
on the update ops that compute the statistics.
keys_to_exclude_from_results: keys in tensor_dict that will be excluded
from results_list. Note that the tensors corresponding to these keys will
still be evaluated for each batch, but won't be added to results_list.
Raises:
ValueError: if restore_fn is None and checkpoint_dirs doesn't have at least
one element.
ValueError: if save_graph is True and save_graph_dir is not defined.
"""
if save_graph and not save_graph_dir:
raise ValueError('`save_graph_dir` must be defined.')
sess = tf.Session(master, graph=tf.get_default_graph())
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
if restore_fn:
restore_fn(sess)
else:
if not checkpoint_dirs:
raise ValueError('`checkpoint_dirs` must have at least one entry.')
checkpoint_file = tf.train.latest_checkpoint(checkpoint_dirs[0])
saver = tf.train.Saver(variables_to_restore)
saver.restore(sess, checkpoint_file)
if save_graph:
tf.train.write_graph(sess.graph_def, save_graph_dir, 'eval.pbtxt')
valid_keys = list(set(tensor_dict.keys()) - set(keys_to_exclude_from_results))
result_lists = {key: [] for key in valid_keys}
counters = {'skipped': 0, 'success': 0}
other_metrics = None
with tf.contrib.slim.queues.QueueRunners(sess):
try:
for batch in range(int(num_batches)):
if (batch + 1) % 100 == 0:
logging.info('Running eval ops batch %d/%d', batch + 1, num_batches)
if not batch_processor:
try:
(result_dict, _) = sess.run([tensor_dict, update_op])
counters['success'] += 1
except tf.errors.InvalidArgumentError:
logging.info('Skipping image')
counters['skipped'] += 1
result_dict = {}
else:
result_dict = batch_processor(
tensor_dict, sess, batch, counters, update_op)
for key in result_dict:
if key in valid_keys:
result_lists[key].append(result_dict[key])
if metric_names_to_values is not None:
other_metrics = sess.run(metric_names_to_values)
logging.info('Running eval batches done.')
except tf.errors.OutOfRangeError:
logging.info('Done evaluating -- epoch limit reached')
finally:
# When done, ask the threads to stop.
metrics = aggregated_result_processor(result_lists)
if other_metrics is not None:
metrics.update(other_metrics)
global_step = tf.train.global_step(sess, slim.get_global_step())
write_metrics(metrics, global_step, summary_dir)
logging.info('# success: %d', counters['success'])
logging.info('# skipped: %d', counters['skipped'])
sess.close()
# TODO: Add tests.
def repeated_checkpoint_run(tensor_dict,
update_op,
summary_dir,
aggregated_result_processor=None,
batch_processor=None,
checkpoint_dirs=None,
variables_to_restore=None,
restore_fn=None,
num_batches=1,
eval_interval_secs=120,
max_number_of_evaluations=None,
master='',
save_graph=False,
save_graph_dir='',
metric_names_to_values=None,
keys_to_exclude_from_results=()):
"""Periodically evaluates desired tensors using checkpoint_dirs or restore_fn.
This function repeatedly loads a checkpoint and evaluates a desired
set of tensors (provided by tensor_dict) and hands the resulting numpy
arrays to a function result_processor which can be used to further
process/save/visualize the results.
Args:
tensor_dict: a dictionary holding tensors representing a batch of detections
and corresponding groundtruth annotations.
update_op: a tensorflow update op that will run for each batch along with
the tensors in tensor_dict.
summary_dir: a directory to write metrics summaries.
aggregated_result_processor: a function taking one argument:
1. result_lists: a dictionary with keys matching those in tensor_dict
and corresponding values being the list of results for each tensor
in tensor_dict. The length of each such list is num_batches.
batch_processor: a function taking three arguments:
1. tensor_dict: the same tensor_dict that is passed in as the first
argument to this function.
2. sess: a tensorflow session
3. batch_index: an integer representing the index of the batch amongst
all batches
4. update_op: a tensorflow update op that will run for each batch.
and returns result_dict, a dictionary of results for that batch.
By default, batch_processor is None, which defaults to running:
return sess.run(tensor_dict)
checkpoint_dirs: list of directories to load into a DetectionModel or an
EnsembleModel if restore_fn isn't set. Also used to determine when to run
next evaluation. Must have at least one element.
variables_to_restore: None, or a dictionary mapping variable names found in
a checkpoint to model variables. The dictionary would normally be
generated by creating a tf.train.ExponentialMovingAverage object and
calling its variables_to_restore() method. Not used if restore_fn is set.
restore_fn: a function that takes a tf.Session object and correctly restores
all necessary variables from the correct checkpoint file.
num_batches: the number of batches to use for evaluation.
eval_interval_secs: the number of seconds between each evaluation run.
max_number_of_evaluations: the max number of iterations of the evaluation.
If the value is left as None the evaluation continues indefinitely.
master: the location of the Tensorflow session.
save_graph: whether or not the Tensorflow graph is saved as a pbtxt file.
save_graph_dir: where to save on disk the Tensorflow graph. If store_graph
is True this must be non-empty.
metric_names_to_values: A dictionary containing metric names to tensors
which will be evaluated after processing all batches
of [tensor_dict, update_op]. If any metrics depend on statistics computed
during each batch ensure that `update_op` tensor has a control dependency
on the update ops that compute the statistics.
keys_to_exclude_from_results: keys in tensor_dict that will be excluded
from results_list. Note that the tensors corresponding to these keys will
still be evaluated for each batch, but won't be added to results_list.
Raises:
ValueError: if max_num_of_evaluations is not None or a positive number.
ValueError: if checkpoint_dirs doesn't have at least one element.
"""
if max_number_of_evaluations and max_number_of_evaluations <= 0:
raise ValueError(
'`number_of_steps` must be either None or a positive number.')
if not checkpoint_dirs:
raise ValueError('`checkpoint_dirs` must have at least one entry.')
last_evaluated_model_path = None
number_of_evaluations = 0
while True:
start = time.time()
logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S',
time.gmtime()))
model_path = tf.train.latest_checkpoint(checkpoint_dirs[0])
if not model_path:
logging.info('No model found in %s. Will try again in %d seconds',
checkpoint_dirs[0], eval_interval_secs)
elif model_path == last_evaluated_model_path:
logging.info('Found already evaluated checkpoint. Will try again in %d '
'seconds', eval_interval_secs)
else:
last_evaluated_model_path = model_path
run_checkpoint_once(tensor_dict, update_op, summary_dir,
aggregated_result_processor,
batch_processor, checkpoint_dirs,
variables_to_restore, restore_fn, num_batches, master,
save_graph, save_graph_dir, metric_names_to_values,
keys_to_exclude_from_results)
number_of_evaluations += 1
if (max_number_of_evaluations and
number_of_evaluations >= max_number_of_evaluations):
logging.info('Finished evaluation!')
break
time_to_next_eval = start + eval_interval_secs - time.time()
if time_to_next_eval > 0:
time.sleep(time_to_next_eval)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Detection model evaluator.
This file provides a generic evaluation method that can be used to evaluate a
DetectionModel.
"""
import logging
import tensorflow as tf
from object_detection import eval_util
from object_detection.core import box_list
from object_detection.core import box_list_ops
from object_detection.core import prefetcher
from object_detection.core import standard_fields as fields
from object_detection.utils import ops
slim = tf.contrib.slim
EVAL_METRICS_FN_DICT = {
'pascal_voc_metrics': eval_util.evaluate_detection_results_pascal_voc
}
def _extract_prediction_tensors(model,
create_input_dict_fn,
ignore_groundtruth=False):
"""Restores the model in a tensorflow session.
Args:
model: model to perform predictions with.
create_input_dict_fn: function to create input tensor dictionaries.
ignore_groundtruth: whether groundtruth should be ignored.
Returns:
tensor_dict: A tensor dictionary with evaluations.
"""
input_dict = create_input_dict_fn()
prefetch_queue = prefetcher.prefetch(input_dict, capacity=500)
input_dict = prefetch_queue.dequeue()
original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0)
preprocessed_image = model.preprocess(tf.to_float(original_image))
prediction_dict = model.predict(preprocessed_image)
detections = model.postprocess(prediction_dict)
original_image_shape = tf.shape(original_image)
absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
box_list.BoxList(tf.squeeze(detections['detection_boxes'], axis=0)),
original_image_shape[1], original_image_shape[2])
label_id_offset = 1
tensor_dict = {
'original_image': original_image,
'image_id': input_dict[fields.InputDataFields.source_id],
'detection_boxes': absolute_detection_boxlist.get(),
'detection_scores': tf.squeeze(detections['detection_scores'], axis=0),
'detection_classes': (
tf.squeeze(detections['detection_classes'], axis=0) +
label_id_offset),
}
if 'detection_masks' in detections:
detection_masks = tf.squeeze(detections['detection_masks'],
axis=0)
detection_boxes = tf.squeeze(detections['detection_boxes'],
axis=0)
# TODO: This should be done in model's postprocess function ideally.
detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
detection_masks,
detection_boxes,
original_image_shape[1], original_image_shape[2])
detection_masks_reframed = tf.to_float(tf.greater(detection_masks_reframed,
0.5))
tensor_dict['detection_masks'] = detection_masks_reframed
# load groundtruth fields into tensor_dict
if not ignore_groundtruth:
normalized_gt_boxlist = box_list.BoxList(
input_dict[fields.InputDataFields.groundtruth_boxes])
gt_boxlist = box_list_ops.scale(normalized_gt_boxlist,
tf.shape(original_image)[1],
tf.shape(original_image)[2])
groundtruth_boxes = gt_boxlist.get()
groundtruth_classes = input_dict[fields.InputDataFields.groundtruth_classes]
tensor_dict['groundtruth_boxes'] = groundtruth_boxes
tensor_dict['groundtruth_classes'] = groundtruth_classes
tensor_dict['area'] = input_dict[fields.InputDataFields.groundtruth_area]
tensor_dict['is_crowd'] = input_dict[
fields.InputDataFields.groundtruth_is_crowd]
tensor_dict['difficult'] = input_dict[
fields.InputDataFields.groundtruth_difficult]
if 'detection_masks' in tensor_dict:
tensor_dict['groundtruth_instance_masks'] = input_dict[
fields.InputDataFields.groundtruth_instance_masks]
return tensor_dict
def evaluate(create_input_dict_fn, create_model_fn, eval_config, categories,
checkpoint_dir, eval_dir):
"""Evaluation function for detection models.
Args:
create_input_dict_fn: a function to create a tensor input dictionary.
create_model_fn: a function that creates a DetectionModel.
eval_config: a eval_pb2.EvalConfig protobuf.
categories: a list of category dictionaries. Each dict in the list should
have an integer 'id' field and string 'name' field.
checkpoint_dir: directory to load the checkpoints to evaluate from.
eval_dir: directory to write evaluation metrics summary to.
"""
model = create_model_fn()
if eval_config.ignore_groundtruth and not eval_config.export_path:
logging.fatal('If ignore_groundtruth=True then an export_path is '
'required. Aborting!!!')
tensor_dict = _extract_prediction_tensors(
model=model,
create_input_dict_fn=create_input_dict_fn,
ignore_groundtruth=eval_config.ignore_groundtruth)
def _process_batch(tensor_dict, sess, batch_index, counters, update_op):
"""Evaluates tensors in tensor_dict, visualizing the first K examples.
This function calls sess.run on tensor_dict, evaluating the original_image
tensor only on the first K examples and visualizing detections overlaid
on this original_image.
Args:
tensor_dict: a dictionary of tensors
sess: tensorflow session
batch_index: the index of the batch amongst all batches in the run.
counters: a dictionary holding 'success' and 'skipped' fields which can
be updated to keep track of number of successful and failed runs,
respectively. If these fields are not updated, then the success/skipped
counter values shown at the end of evaluation will be incorrect.
update_op: An update op that has to be run along with output tensors. For
example this could be an op to compute statistics for slim metrics.
Returns:
result_dict: a dictionary of numpy arrays
"""
if batch_index >= eval_config.num_visualizations:
if 'original_image' in tensor_dict:
tensor_dict = {k: v for (k, v) in tensor_dict.iteritems()
if k != 'original_image'}
try:
(result_dict, _) = sess.run([tensor_dict, update_op])
counters['success'] += 1
except tf.errors.InvalidArgumentError:
logging.info('Skipping image')
counters['skipped'] += 1
return {}
global_step = tf.train.global_step(sess, slim.get_global_step())
if batch_index < eval_config.num_visualizations:
tag = 'image-{}'.format(batch_index)
eval_util.visualize_detection_results(
result_dict, tag, global_step, categories=categories,
summary_dir=eval_dir,
export_dir=eval_config.visualization_export_dir,
show_groundtruth=eval_config.visualization_export_dir)
return result_dict
def _process_aggregated_results(result_lists):
eval_metric_fn_key = eval_config.metrics_set
if eval_metric_fn_key not in EVAL_METRICS_FN_DICT:
raise ValueError('Metric not found: {}'.format(eval_metric_fn_key))
return EVAL_METRICS_FN_DICT[eval_metric_fn_key](result_lists,
categories=categories)
variables_to_restore = tf.global_variables()
global_step = slim.get_or_create_global_step()
variables_to_restore.append(global_step)
if eval_config.use_moving_averages:
variable_averages = tf.train.ExponentialMovingAverage(0.0)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
def _restore_latest_checkpoint(sess):
latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
saver.restore(sess, latest_checkpoint)
eval_util.repeated_checkpoint_run(
tensor_dict=tensor_dict,
update_op=tf.no_op(),
summary_dir=eval_dir,
aggregated_result_processor=_process_aggregated_results,
batch_processor=_process_batch,
checkpoint_dirs=[checkpoint_dir],
variables_to_restore=None,
restore_fn=_restore_latest_checkpoint,
num_batches=eval_config.num_examples,
eval_interval_secs=eval_config.eval_interval_secs,
max_number_of_evaluations=(
1 if eval_config.ignore_groundtruth else
eval_config.max_evals if eval_config.max_evals else
None),
master=eval_config.eval_master,
save_graph=eval_config.save_graph,
save_graph_dir=(eval_dir if eval_config.save_graph else ''))
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Tool to export an object detection model for inference.
Prepares an object detection tensorflow graph for inference using model
configuration and an optional trained checkpoint.
The inference graph contains one of two input nodes depending on the user
specified option.
* `image_tensor`: Accepts a uint8 4-D tensor of shape [1, None, None, 3]
* `tf_example`: Accepts a serialized TFExample proto. The batch size in this
case is always 1.
and the following output nodes:
* `num_detections` : Outputs float32 tensors of the form [batch]
that specifies the number of valid boxes per image in the batch.
* `detection_boxes` : Outputs float32 tensors of the form
[batch, num_boxes, 4] containing detected boxes.
* `detection_scores` : Outputs float32 tensors of the form
[batch, num_boxes] containing class scores for the detections.
* `detection_classes`: Outputs float32 tensors of the form
[batch, num_boxes] containing classes for the detections.
Note that currently `batch` is always 1, but we will support `batch` > 1 in
the future.
Optionally, one can freeze the graph by converting the weights in the provided
checkpoint as graph constants thereby eliminating the need to use a checkpoint
file during inference.
Note that this tool uses `use_moving_averages` from eval_config to decide
which weights to freeze.
Example Usage:
--------------
python export_inference_graph \
--input_type image_tensor \
--pipeline_config_path path/to/ssd_inception_v2.config \
--checkpoint_path path/to/model-ckpt \
--inference_graph_path path/to/inference_graph.pb
"""
import tensorflow as tf
from google.protobuf import text_format
from object_detection import exporter
from object_detection.protos import pipeline_pb2
slim = tf.contrib.slim
flags = tf.app.flags
flags.DEFINE_string('input_type', 'image_tensor', 'Type of input node. Can be '
'one of [`image_tensor` `tf_example_proto`]')
flags.DEFINE_string('pipeline_config_path', '',
'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
'file.')
flags.DEFINE_string('checkpoint_path', '', 'Optional path to checkpoint file. '
'If provided, bakes the weights from the checkpoint into '
'the graph.')
flags.DEFINE_string('inference_graph_path', '', 'Path to write the output '
'inference graph.')
FLAGS = flags.FLAGS
def main(_):
assert FLAGS.pipeline_config_path, 'TrainEvalPipelineConfig missing.'
assert FLAGS.inference_graph_path, 'Inference graph path missing.'
assert FLAGS.input_type, 'Input type missing.'
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f:
text_format.Merge(f.read(), pipeline_config)
exporter.export_inference_graph(FLAGS.input_type, pipeline_config,
FLAGS.checkpoint_path,
FLAGS.inference_graph_path)
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functions to export object detection inference graph."""
import logging
import os
import tensorflow as tf
from tensorflow.python import pywrap_tensorflow
from tensorflow.python.client import session
from tensorflow.python.framework import graph_util
from tensorflow.python.framework import importer
from tensorflow.python.platform import gfile
from tensorflow.python.training import saver as saver_lib
from object_detection.builders import model_builder
from object_detection.core import standard_fields as fields
from object_detection.data_decoders import tf_example_decoder
slim = tf.contrib.slim
# TODO: Replace with freeze_graph.freeze_graph_with_def_protos when newer
# version of Tensorflow becomes more common.
def freeze_graph_with_def_protos(
input_graph_def,
input_saver_def,
input_checkpoint,
output_node_names,
restore_op_name,
filename_tensor_name,
output_graph,
clear_devices,
initializer_nodes,
variable_names_blacklist=''):
"""Converts all variables in a graph and checkpoint into constants."""
del restore_op_name, filename_tensor_name # Unused by updated loading code.
# 'input_checkpoint' may be a prefix if we're using Saver V2 format
if not saver_lib.checkpoint_exists(input_checkpoint):
logging.info('Input checkpoint "' + input_checkpoint + '" does not exist!')
return -1
if not output_node_names:
logging.info('You must supply the name of a node to --output_node_names.')
return -1
# Remove all the explicit device specifications for this node. This helps to
# make the graph more portable.
if clear_devices:
for node in input_graph_def.node:
node.device = ''
_ = importer.import_graph_def(input_graph_def, name='')
with session.Session() as sess:
if input_saver_def:
saver = saver_lib.Saver(saver_def=input_saver_def)
saver.restore(sess, input_checkpoint)
else:
var_list = {}
reader = pywrap_tensorflow.NewCheckpointReader(input_checkpoint)
var_to_shape_map = reader.get_variable_to_shape_map()
for key in var_to_shape_map:
try:
tensor = sess.graph.get_tensor_by_name(key + ':0')
except KeyError:
# This tensor doesn't exist in the graph (for example it's
# 'global_step' or a similar housekeeping element) so skip it.
continue
var_list[key] = tensor
saver = saver_lib.Saver(var_list=var_list)
saver.restore(sess, input_checkpoint)
if initializer_nodes:
sess.run(initializer_nodes)
variable_names_blacklist = (variable_names_blacklist.split(',') if
variable_names_blacklist else None)
output_graph_def = graph_util.convert_variables_to_constants(
sess,
input_graph_def,
output_node_names.split(','),
variable_names_blacklist=variable_names_blacklist)
with gfile.GFile(output_graph, 'wb') as f:
f.write(output_graph_def.SerializeToString())
logging.info('%d ops in the final graph.', len(output_graph_def.node))
# TODO: Support batch tf example inputs.
def _tf_example_input_placeholder():
tf_example_placeholder = tf.placeholder(
tf.string, shape=[], name='tf_example')
tensor_dict = tf_example_decoder.TfExampleDecoder().Decode(
tf_example_placeholder)
image = tensor_dict[fields.InputDataFields.image]
return tf.expand_dims(image, axis=0)
def _image_tensor_input_placeholder():
return tf.placeholder(dtype=tf.uint8,
shape=(1, None, None, 3),
name='image_tensor')
input_placeholder_fn_map = {
'tf_example': _tf_example_input_placeholder,
'image_tensor': _image_tensor_input_placeholder
}
def _add_output_tensor_nodes(postprocessed_tensors):
"""Adds output nodes for detection boxes and scores.
Adds the following nodes for output tensors -
* num_detections: float32 tensor of shape [batch_size].
* detection_boxes: float32 tensor of shape [batch_size, num_boxes, 4]
containing detected boxes.
* detection_scores: float32 tensor of shape [batch_size, num_boxes]
containing scores for the detected boxes.
* detection_classes: float32 tensor of shape [batch_size, num_boxes]
containing class predictions for the detected boxes.
Args:
postprocessed_tensors: a dictionary containing the following fields
'detection_boxes': [batch, max_detections, 4]
'detection_scores': [batch, max_detections]
'detection_classes': [batch, max_detections]
'num_detections': [batch]
"""
label_id_offset = 1
boxes = postprocessed_tensors.get('detection_boxes')
scores = postprocessed_tensors.get('detection_scores')
classes = postprocessed_tensors.get('detection_classes') + label_id_offset
num_detections = postprocessed_tensors.get('num_detections')
tf.identity(boxes, name='detection_boxes')
tf.identity(scores, name='detection_scores')
tf.identity(classes, name='detection_classes')
tf.identity(num_detections, name='num_detections')
def _write_inference_graph(inference_graph_path,
checkpoint_path=None,
use_moving_averages=False,
output_node_names=(
'num_detections,detection_scores,'
'detection_boxes,detection_classes')):
"""Writes inference graph to disk with the option to bake in weights.
If checkpoint_path is not None bakes the weights into the graph thereby
eliminating the need of checkpoint files during inference. If the model
was trained with moving averages, setting use_moving_averages to true
restores the moving averages, otherwise the original set of variables
is restored.
Args:
inference_graph_path: Path to write inference graph.
checkpoint_path: Optional path to the checkpoint file.
use_moving_averages: Whether to export the original or the moving averages
of the trainable variables from the checkpoint.
output_node_names: Output tensor names, defaults are: num_detections,
detection_scores, detection_boxes, detection_classes.
"""
inference_graph_def = tf.get_default_graph().as_graph_def()
if checkpoint_path:
saver = None
if use_moving_averages:
variable_averages = tf.train.ExponentialMovingAverage(0.0)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
else:
saver = tf.train.Saver()
freeze_graph_with_def_protos(
input_graph_def=inference_graph_def,
input_saver_def=saver.as_saver_def(),
input_checkpoint=checkpoint_path,
output_node_names=output_node_names,
restore_op_name='save/restore_all',
filename_tensor_name='save/Const:0',
output_graph=inference_graph_path,
clear_devices=True,
initializer_nodes='')
return
tf.train.write_graph(inference_graph_def,
os.path.dirname(inference_graph_path),
os.path.basename(inference_graph_path),
as_text=False)
def _export_inference_graph(input_type,
detection_model,
use_moving_averages,
checkpoint_path,
inference_graph_path):
if input_type not in input_placeholder_fn_map:
raise ValueError('Unknown input type: {}'.format(input_type))
inputs = tf.to_float(input_placeholder_fn_map[input_type]())
preprocessed_inputs = detection_model.preprocess(inputs)
output_tensors = detection_model.predict(preprocessed_inputs)
postprocessed_tensors = detection_model.postprocess(output_tensors)
_add_output_tensor_nodes(postprocessed_tensors)
_write_inference_graph(inference_graph_path, checkpoint_path,
use_moving_averages)
def export_inference_graph(input_type, pipeline_config, checkpoint_path,
inference_graph_path):
"""Exports inference graph for the model specified in the pipeline config.
Args:
input_type: Type of input for the graph. Can be one of [`image_tensor`,
`tf_example`].
pipeline_config: pipeline_pb2.TrainAndEvalPipelineConfig proto.
checkpoint_path: Path to the checkpoint file to freeze.
inference_graph_path: Path to write inference graph to.
"""
detection_model = model_builder.build(pipeline_config.model,
is_training=False)
_export_inference_graph(input_type, detection_model,
pipeline_config.eval_config.use_moving_averages,
checkpoint_path, inference_graph_path)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.export_inference_graph."""
import os
import mock
import numpy as np
import tensorflow as tf
from object_detection import exporter
from object_detection.builders import model_builder
from object_detection.core import model
from object_detection.protos import pipeline_pb2
class FakeModel(model.DetectionModel):
def preprocess(self, inputs):
return (tf.identity(inputs) *
tf.get_variable('dummy', shape=(),
initializer=tf.constant_initializer(2),
dtype=tf.float32))
def predict(self, preprocessed_inputs):
return {'image': tf.identity(preprocessed_inputs)}
def postprocess(self, prediction_dict):
with tf.control_dependencies(prediction_dict.values()):
return {
'detection_boxes': tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.8, 0.8]], tf.float32),
'detection_scores': tf.constant([[0.7, 0.6]], tf.float32),
'detection_classes': tf.constant([[0, 1]], tf.float32),
'num_detections': tf.constant([2], tf.float32)
}
def restore_fn(self, checkpoint_path, from_detection_checkpoint):
pass
def loss(self, prediction_dict):
pass
class ExportInferenceGraphTest(tf.test.TestCase):
def _save_checkpoint_from_mock_model(self, checkpoint_path,
use_moving_averages):
g = tf.Graph()
with g.as_default():
mock_model = FakeModel(num_classes=1)
mock_model.preprocess(tf.constant([1, 3, 4, 3], tf.float32))
if use_moving_averages:
tf.train.ExponentialMovingAverage(0.0).apply()
saver = tf.train.Saver()
init = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init)
saver.save(sess, checkpoint_path)
def _load_inference_graph(self, inference_graph_path):
od_graph = tf.Graph()
with od_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(inference_graph_path) as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
return od_graph
def _create_tf_example(self, image_array):
with self.test_session():
encoded_image = tf.image.encode_jpeg(tf.constant(image_array)).eval()
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': _bytes_feature(encoded_image),
'image/format': _bytes_feature('jpg'),
'image/source_id': _bytes_feature('image_id')
})).SerializeToString()
return example
def test_export_graph_with_image_tensor_input(self):
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(num_classes=1)
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pbtxt')
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
exporter.export_inference_graph(
input_type='image_tensor',
pipeline_config=pipeline_config,
checkpoint_path=None,
inference_graph_path=inference_graph_path)
def test_export_graph_with_tf_example_input(self):
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(num_classes=1)
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pbtxt')
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
exporter.export_inference_graph(
input_type='tf_example',
pipeline_config=pipeline_config,
checkpoint_path=None,
inference_graph_path=inference_graph_path)
def test_export_frozen_graph(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
use_moving_averages=False)
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pb')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(num_classes=1)
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
exporter.export_inference_graph(
input_type='image_tensor',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path)
def test_export_frozen_graph_with_moving_averages(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
use_moving_averages=True)
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pb')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(num_classes=1)
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = True
exporter.export_inference_graph(
input_type='image_tensor',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path)
def test_export_and_run_inference_with_image_tensor(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
use_moving_averages=False)
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pb')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(num_classes=1)
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
exporter.export_inference_graph(
input_type='image_tensor',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path)
inference_graph = self._load_inference_graph(inference_graph_path)
with self.test_session(graph=inference_graph) as sess:
image_tensor = inference_graph.get_tensor_by_name('image_tensor:0')
boxes = inference_graph.get_tensor_by_name('detection_boxes:0')
scores = inference_graph.get_tensor_by_name('detection_scores:0')
classes = inference_graph.get_tensor_by_name('detection_classes:0')
num_detections = inference_graph.get_tensor_by_name('num_detections:0')
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: np.ones((1, 4, 4, 3)).astype(np.uint8)})
self.assertAllClose(boxes, [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.8, 0.8]])
self.assertAllClose(scores, [[0.7, 0.6]])
self.assertAllClose(classes, [[1, 2]])
self.assertAllClose(num_detections, [2])
def test_export_and_run_inference_with_tf_example(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
use_moving_averages=False)
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pb')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(num_classes=1)
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
exporter.export_inference_graph(
input_type='tf_example',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path)
inference_graph = self._load_inference_graph(inference_graph_path)
with self.test_session(graph=inference_graph) as sess:
tf_example = inference_graph.get_tensor_by_name('tf_example:0')
boxes = inference_graph.get_tensor_by_name('detection_boxes:0')
scores = inference_graph.get_tensor_by_name('detection_scores:0')
classes = inference_graph.get_tensor_by_name('detection_classes:0')
num_detections = inference_graph.get_tensor_by_name('num_detections:0')
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={tf_example: self._create_tf_example(
np.ones((4, 4, 3)).astype(np.uint8))})
self.assertAllClose(boxes, [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.8, 0.8]])
self.assertAllClose(scores, [[0.7, 0.6]])
self.assertAllClose(classes, [[1, 2]])
self.assertAllClose(num_detections, [2])
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment