Commit 27b4acd4 authored by Aman Gupta's avatar Aman Gupta
Browse files

Merge remote-tracking branch 'upstream/master'

parents 5133522f d4e1f97f
...@@ -14,8 +14,12 @@ ...@@ -14,8 +14,12 @@
# ============================================================================== # ==============================================================================
"""Tests for object_detection.meta_architectures.faster_rcnn_meta_arch.""" """Tests for object_detection.meta_architectures.faster_rcnn_meta_arch."""
import functools
from absl.testing import parameterized
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from google.protobuf import text_format from google.protobuf import text_format
from object_detection.anchor_generators import grid_anchor_generator from object_detection.anchor_generators import grid_anchor_generator
from object_detection.builders import box_predictor_builder from object_detection.builders import box_predictor_builder
...@@ -23,11 +27,14 @@ from object_detection.builders import hyperparams_builder ...@@ -23,11 +27,14 @@ from object_detection.builders import hyperparams_builder
from object_detection.builders import post_processing_builder from object_detection.builders import post_processing_builder
from object_detection.core import balanced_positive_negative_sampler as sampler from object_detection.core import balanced_positive_negative_sampler as sampler
from object_detection.core import losses from object_detection.core import losses
from object_detection.core import post_processing
from object_detection.core import target_assigner from object_detection.core import target_assigner
from object_detection.meta_architectures import faster_rcnn_meta_arch from object_detection.meta_architectures import faster_rcnn_meta_arch
from object_detection.protos import box_predictor_pb2 from object_detection.protos import box_predictor_pb2
from object_detection.protos import hyperparams_pb2 from object_detection.protos import hyperparams_pb2
from object_detection.protos import post_processing_pb2 from object_detection.protos import post_processing_pb2
from object_detection.utils import ops
from object_detection.utils import test_case
from object_detection.utils import test_utils from object_detection.utils import test_utils
slim = tf.contrib.slim slim = tf.contrib.slim
...@@ -60,7 +67,7 @@ class FakeFasterRCNNFeatureExtractor( ...@@ -60,7 +67,7 @@ class FakeFasterRCNNFeatureExtractor(
num_outputs=3, kernel_size=1, scope='layer2') num_outputs=3, kernel_size=1, scope='layer2')
class FasterRCNNMetaArchTestBase(tf.test.TestCase): class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
"""Base class to test Faster R-CNN and R-FCN meta architectures.""" """Base class to test Faster R-CNN and R-FCN meta architectures."""
def _build_arg_scope_with_hyperparams(self, def _build_arg_scope_with_hyperparams(self,
...@@ -157,7 +164,8 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase): ...@@ -157,7 +164,8 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
masks_are_class_agnostic=False, masks_are_class_agnostic=False,
use_matmul_crop_and_resize=False, use_matmul_crop_and_resize=False,
clip_anchors_to_image=False, clip_anchors_to_image=False,
use_matmul_gather_in_matcher=False): use_matmul_gather_in_matcher=False,
use_static_shapes=False):
def image_resizer_fn(image, masks=None): def image_resizer_fn(image, masks=None):
"""Fake image resizer function.""" """Fake image resizer function."""
...@@ -220,11 +228,18 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase): ...@@ -220,11 +228,18 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
first_stage_box_predictor_depth = 512 first_stage_box_predictor_depth = 512
first_stage_minibatch_size = 3 first_stage_minibatch_size = 3
first_stage_sampler = sampler.BalancedPositiveNegativeSampler( first_stage_sampler = sampler.BalancedPositiveNegativeSampler(
positive_fraction=0.5, is_static=False) positive_fraction=0.5, is_static=use_static_shapes)
first_stage_nms_score_threshold = -1.0 first_stage_nms_score_threshold = -1.0
first_stage_nms_iou_threshold = 1.0 first_stage_nms_iou_threshold = 1.0
first_stage_max_proposals = first_stage_max_proposals first_stage_max_proposals = first_stage_max_proposals
first_stage_non_max_suppression_fn = functools.partial(
post_processing.batch_multiclass_non_max_suppression,
score_thresh=first_stage_nms_score_threshold,
iou_thresh=first_stage_nms_iou_threshold,
max_size_per_class=first_stage_max_proposals,
max_total_size=first_stage_max_proposals,
use_static_shapes=use_static_shapes)
first_stage_localization_loss_weight = 1.0 first_stage_localization_loss_weight = 1.0
first_stage_objectness_loss_weight = 1.0 first_stage_objectness_loss_weight = 1.0
...@@ -246,7 +261,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase): ...@@ -246,7 +261,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
second_stage_non_max_suppression_fn, _ = post_processing_builder.build( second_stage_non_max_suppression_fn, _ = post_processing_builder.build(
post_processing_config) post_processing_config)
second_stage_sampler = sampler.BalancedPositiveNegativeSampler( second_stage_sampler = sampler.BalancedPositiveNegativeSampler(
positive_fraction=1.0, is_static=False) positive_fraction=1.0, is_static=use_static_shapes)
second_stage_score_conversion_fn = tf.identity second_stage_score_conversion_fn = tf.identity
second_stage_localization_loss_weight = 1.0 second_stage_localization_loss_weight = 1.0
...@@ -268,6 +283,9 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase): ...@@ -268,6 +283,9 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
loc_loss_weight=second_stage_localization_loss_weight, loc_loss_weight=second_stage_localization_loss_weight,
max_negatives_per_positive=None) max_negatives_per_positive=None)
crop_and_resize_fn = (
ops.matmul_crop_and_resize
if use_matmul_crop_and_resize else ops.native_crop_and_resize)
common_kwargs = { common_kwargs = {
'is_training': is_training, 'is_training': is_training,
'num_classes': num_classes, 'num_classes': num_classes,
...@@ -284,8 +302,8 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase): ...@@ -284,8 +302,8 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_minibatch_size': first_stage_minibatch_size,
'first_stage_sampler': first_stage_sampler, 'first_stage_sampler': first_stage_sampler,
'first_stage_nms_score_threshold': first_stage_nms_score_threshold, 'first_stage_non_max_suppression_fn':
'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold, first_stage_non_max_suppression_fn,
'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_max_proposals': first_stage_max_proposals,
'first_stage_localization_loss_weight': 'first_stage_localization_loss_weight':
first_stage_localization_loss_weight, first_stage_localization_loss_weight,
...@@ -304,8 +322,10 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase): ...@@ -304,8 +322,10 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
'second_stage_classification_loss': 'second_stage_classification_loss':
second_stage_classification_loss, second_stage_classification_loss,
'hard_example_miner': hard_example_miner, 'hard_example_miner': hard_example_miner,
'use_matmul_crop_and_resize': use_matmul_crop_and_resize, 'crop_and_resize_fn': crop_and_resize_fn,
'clip_anchors_to_image': clip_anchors_to_image 'clip_anchors_to_image': clip_anchors_to_image,
'use_static_shapes': use_static_shapes,
'resize_masks': True,
} }
return self._get_model( return self._get_model(
...@@ -412,7 +432,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase): ...@@ -412,7 +432,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
anchors = prediction_out['anchors'] anchors = prediction_out['anchors']
self.assertTrue(len(anchors.shape) == 2 and anchors.shape[1] == 4) self.assertTrue(len(anchors.shape) == 2 and anchors.shape[1] == 4)
num_anchors_out = anchors.shape[0] num_anchors_out = anchors.shape[0]
self.assertTrue(num_anchors_out < num_anchors_strict_upper_bound) self.assertLess(num_anchors_out, num_anchors_strict_upper_bound)
self.assertTrue(np.all(np.greater_equal(anchors, 0))) self.assertTrue(np.all(np.greater_equal(anchors, 0)))
self.assertTrue(np.all(np.less_equal(anchors[:, 0], height))) self.assertTrue(np.all(np.less_equal(anchors[:, 0], height)))
...@@ -484,94 +504,97 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase): ...@@ -484,94 +504,97 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
for key in expected_shapes: for key in expected_shapes:
self.assertAllEqual(tensor_dict_out[key].shape, expected_shapes[key]) self.assertAllEqual(tensor_dict_out[key].shape, expected_shapes[key])
def _test_predict_gives_correct_shapes_in_train_mode_both_stages( def test_predict_gives_correct_shapes_in_train_mode_both_stages(
self, use_matmul_crop_and_resize=False, self,
clip_anchors_to_image=False): use_static_shapes=False):
test_graph = tf.Graph() batch_size = 2
with test_graph.as_default(): image_size = 10
max_num_proposals = 7
initial_crop_size = 3
maxpool_stride = 1
def graph_fn(images, gt_boxes, gt_classes, gt_weights):
"""Function to construct tf graph for the test."""
model = self._build_model( model = self._build_model(
is_training=True, is_training=True,
number_of_stages=2, number_of_stages=2,
second_stage_batch_size=7, second_stage_batch_size=7,
predict_masks=False, predict_masks=False,
use_matmul_crop_and_resize=use_matmul_crop_and_resize, use_matmul_crop_and_resize=use_static_shapes,
clip_anchors_to_image=clip_anchors_to_image) clip_anchors_to_image=use_static_shapes,
use_static_shapes=use_static_shapes)
batch_size = 2 preprocessed_inputs, true_image_shapes = model.preprocess(images)
image_size = 10
max_num_proposals = 7
initial_crop_size = 3
maxpool_stride = 1
image_shape = (batch_size, image_size, image_size, 3)
preprocessed_inputs = tf.zeros(image_shape, dtype=tf.float32)
groundtruth_boxes_list = [
tf.constant([[0, 0, .5, .5], [.5, .5, 1, 1]], dtype=tf.float32),
tf.constant([[0, .5, .5, 1], [.5, 0, 1, .5]], dtype=tf.float32)]
groundtruth_classes_list = [
tf.constant([[1, 0], [0, 1]], dtype=tf.float32),
tf.constant([[1, 0], [1, 0]], dtype=tf.float32)]
groundtruth_weights_list = [
tf.constant([1, 1], dtype=tf.float32),
tf.constant([1, 1], dtype=tf.float32)]
_, true_image_shapes = model.preprocess(tf.zeros(image_shape))
model.provide_groundtruth( model.provide_groundtruth(
groundtruth_boxes_list, groundtruth_boxes_list=tf.unstack(gt_boxes),
groundtruth_classes_list, groundtruth_classes_list=tf.unstack(gt_classes),
groundtruth_weights_list=groundtruth_weights_list) groundtruth_weights_list=tf.unstack(gt_weights))
result_tensor_dict = model.predict(preprocessed_inputs, true_image_shapes) result_tensor_dict = model.predict(preprocessed_inputs, true_image_shapes)
expected_shapes = { return (result_tensor_dict['refined_box_encodings'],
'rpn_box_predictor_features': result_tensor_dict['class_predictions_with_background'],
(2, image_size, image_size, 512), result_tensor_dict['proposal_boxes'],
'rpn_features_to_crop': (2, image_size, image_size, 3), result_tensor_dict['proposal_boxes_normalized'],
'image_shape': (4,), result_tensor_dict['anchors'],
'refined_box_encodings': (2 * max_num_proposals, 2, 4), result_tensor_dict['rpn_box_encodings'],
'class_predictions_with_background': (2 * max_num_proposals, 2 + 1), result_tensor_dict['rpn_objectness_predictions_with_background'],
'num_proposals': (2,), result_tensor_dict['rpn_features_to_crop'],
'proposal_boxes': (2, max_num_proposals, 4), result_tensor_dict['rpn_box_predictor_features'],
'proposal_boxes_normalized': (2, max_num_proposals, 4), )
'box_classifier_features':
self._get_box_classifier_features_shape(image_size, image_shape = (batch_size, image_size, image_size, 3)
batch_size, images = np.zeros(image_shape, dtype=np.float32)
max_num_proposals, gt_boxes = np.stack([
initial_crop_size, np.array([[0, 0, .5, .5], [.5, .5, 1, 1]], dtype=np.float32),
maxpool_stride, np.array([[0, .5, .5, 1], [.5, 0, 1, .5]], dtype=np.float32)
3) ])
} gt_classes = np.stack([
np.array([[1, 0], [0, 1]], dtype=np.float32),
init_op = tf.global_variables_initializer() np.array([[1, 0], [1, 0]], dtype=np.float32)
with self.test_session(graph=test_graph) as sess: ])
sess.run(init_op) gt_weights = np.stack([
tensor_dict_out = sess.run(result_tensor_dict) np.array([1, 1], dtype=np.float32),
self.assertEqual(set(tensor_dict_out.keys()), np.array([1, 1], dtype=np.float32)
set(expected_shapes.keys()).union(set([ ])
'rpn_box_encodings', if use_static_shapes:
'rpn_objectness_predictions_with_background', results = self.execute(graph_fn,
'anchors']))) [images, gt_boxes, gt_classes, gt_weights])
for key in expected_shapes: else:
self.assertAllEqual(tensor_dict_out[key].shape, expected_shapes[key]) results = self.execute_cpu(graph_fn,
[images, gt_boxes, gt_classes, gt_weights])
anchors_shape_out = tensor_dict_out['anchors'].shape
self.assertEqual(2, len(anchors_shape_out))
self.assertEqual(4, anchors_shape_out[1])
num_anchors_out = anchors_shape_out[0]
self.assertAllEqual(tensor_dict_out['rpn_box_encodings'].shape,
(2, num_anchors_out, 4))
self.assertAllEqual(
tensor_dict_out['rpn_objectness_predictions_with_background'].shape,
(2, num_anchors_out, 2))
def test_predict_gives_correct_shapes_in_train_mode_both_stages(self):
self._test_predict_gives_correct_shapes_in_train_mode_both_stages()
def test_predict_gives_correct_shapes_in_train_mode_matmul_crop_resize(self):
self._test_predict_gives_correct_shapes_in_train_mode_both_stages(
use_matmul_crop_and_resize=True)
def test_predict_gives_correct_shapes_in_train_mode_clip_anchors(self): expected_shapes = {
self._test_predict_gives_correct_shapes_in_train_mode_both_stages( 'rpn_box_predictor_features': (2, image_size, image_size, 512),
clip_anchors_to_image=True) 'rpn_features_to_crop': (2, image_size, image_size, 3),
'refined_box_encodings': (2 * max_num_proposals, 2, 4),
'class_predictions_with_background': (2 * max_num_proposals, 2 + 1),
'proposal_boxes': (2, max_num_proposals, 4),
'rpn_box_encodings': (2, image_size * image_size * 9, 4),
'proposal_boxes_normalized': (2, max_num_proposals, 4),
'box_classifier_features':
self._get_box_classifier_features_shape(
image_size, batch_size, max_num_proposals, initial_crop_size,
maxpool_stride, 3),
'rpn_objectness_predictions_with_background':
(2, image_size * image_size * 9, 2)
}
# TODO(rathodv): Possibly change utils/test_case.py to accept dictionaries
# and return dicionaries so don't have to rely on the order of tensors.
self.assertAllEqual(results[0].shape,
expected_shapes['refined_box_encodings'])
self.assertAllEqual(results[1].shape,
expected_shapes['class_predictions_with_background'])
self.assertAllEqual(results[2].shape, expected_shapes['proposal_boxes'])
self.assertAllEqual(results[3].shape,
expected_shapes['proposal_boxes_normalized'])
anchors_shape = results[4].shape
self.assertAllEqual(results[5].shape,
[batch_size, anchors_shape[0], 4])
self.assertAllEqual(results[6].shape,
[batch_size, anchors_shape[0], 2])
self.assertAllEqual(results[7].shape,
expected_shapes['rpn_features_to_crop'])
self.assertAllEqual(results[8].shape,
expected_shapes['rpn_box_predictor_features'])
def _test_postprocess_first_stage_only_inference_mode( def _test_postprocess_first_stage_only_inference_mode(
self, pad_to_max_dimension=None): self, pad_to_max_dimension=None):
...@@ -848,10 +871,10 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase): ...@@ -848,10 +871,10 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
loss_dict_out = sess.run(loss_dict) loss_dict_out = sess.run(loss_dict)
self.assertAllClose(loss_dict_out['Loss/RPNLoss/localization_loss'], 0) self.assertAllClose(loss_dict_out['Loss/RPNLoss/localization_loss'], 0)
self.assertAllClose(loss_dict_out['Loss/RPNLoss/objectness_loss'], 0) self.assertAllClose(loss_dict_out['Loss/RPNLoss/objectness_loss'], 0)
self.assertTrue('Loss/BoxClassifierLoss/localization_loss' self.assertNotIn('Loss/BoxClassifierLoss/localization_loss',
not in loss_dict_out) loss_dict_out)
self.assertTrue('Loss/BoxClassifierLoss/classification_loss' self.assertNotIn('Loss/BoxClassifierLoss/classification_loss',
not in loss_dict_out) loss_dict_out)
# TODO(rathodv): Split test into two - with and without masks. # TODO(rathodv): Split test into two - with and without masks.
def test_loss_full(self): def test_loss_full(self):
...@@ -1157,22 +1180,58 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase): ...@@ -1157,22 +1180,58 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
'Loss/BoxClassifierLoss/classification_loss'], 0) 'Loss/BoxClassifierLoss/classification_loss'], 0)
self.assertAllClose(loss_dict_out['Loss/BoxClassifierLoss/mask_loss'], 0) self.assertAllClose(loss_dict_out['Loss/BoxClassifierLoss/mask_loss'], 0)
def test_loss_full_zero_padded_proposals_nonzero_loss_with_two_images(self): def test_loss_full_zero_padded_proposals_nonzero_loss_with_two_images(
model = self._build_model( self, use_static_shapes=False, shared_boxes=False):
is_training=True, number_of_stages=2, second_stage_batch_size=6)
batch_size = 2 batch_size = 2
anchors = tf.constant( first_stage_max_proposals = 8
second_stage_batch_size = 6
num_classes = 2
def graph_fn(anchors, rpn_box_encodings,
rpn_objectness_predictions_with_background, images,
num_proposals, proposal_boxes, refined_box_encodings,
class_predictions_with_background, groundtruth_boxes,
groundtruth_classes):
"""Function to construct tf graph for the test."""
model = self._build_model(
is_training=True, number_of_stages=2,
second_stage_batch_size=second_stage_batch_size,
first_stage_max_proposals=first_stage_max_proposals,
num_classes=num_classes,
use_matmul_crop_and_resize=use_static_shapes,
clip_anchors_to_image=use_static_shapes,
use_static_shapes=use_static_shapes)
prediction_dict = {
'rpn_box_encodings': rpn_box_encodings,
'rpn_objectness_predictions_with_background':
rpn_objectness_predictions_with_background,
'image_shape': tf.shape(images),
'anchors': anchors,
'refined_box_encodings': refined_box_encodings,
'class_predictions_with_background':
class_predictions_with_background,
'proposal_boxes': proposal_boxes,
'num_proposals': num_proposals
}
_, true_image_shapes = model.preprocess(images)
model.provide_groundtruth(tf.unstack(groundtruth_boxes),
tf.unstack(groundtruth_classes))
loss_dict = model.loss(prediction_dict, true_image_shapes)
return (loss_dict['Loss/RPNLoss/localization_loss'],
loss_dict['Loss/RPNLoss/objectness_loss'],
loss_dict['Loss/BoxClassifierLoss/localization_loss'],
loss_dict['Loss/BoxClassifierLoss/classification_loss'])
anchors = np.array(
[[0, 0, 16, 16], [[0, 0, 16, 16],
[0, 16, 16, 32], [0, 16, 16, 32],
[16, 0, 32, 16], [16, 0, 32, 16],
[16, 16, 32, 32]], dtype=tf.float32) [16, 16, 32, 32]], dtype=np.float32)
rpn_box_encodings = tf.zeros( rpn_box_encodings = np.zeros(
[batch_size, [batch_size, anchors.shape[1], BOX_CODE_SIZE], dtype=np.float32)
anchors.get_shape().as_list()[0],
BOX_CODE_SIZE], dtype=tf.float32)
# use different numbers for the objectness category to break ties in # use different numbers for the objectness category to break ties in
# order of boxes returned by NMS # order of boxes returned by NMS
rpn_objectness_predictions_with_background = tf.constant( rpn_objectness_predictions_with_background = np.array(
[[[-10, 13], [[[-10, 13],
[10, -10], [10, -10],
[10, -11], [10, -11],
...@@ -1180,13 +1239,13 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase): ...@@ -1180,13 +1239,13 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
[[-10, 13], [[-10, 13],
[10, -10], [10, -10],
[10, -11], [10, -11],
[10, -12]]], dtype=tf.float32) [10, -12]]], dtype=np.float32)
image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32) images = np.zeros([batch_size, 32, 32, 3], dtype=np.float32)
# box_classifier_batch_size is 6, but here we assume that the number of # box_classifier_batch_size is 6, but here we assume that the number of
# actual proposals (not counting zero paddings) is fewer. # actual proposals (not counting zero paddings) is fewer.
num_proposals = tf.constant([3, 2], dtype=tf.int32) num_proposals = np.array([3, 2], dtype=np.int32)
proposal_boxes = tf.constant( proposal_boxes = np.array(
[[[0, 0, 16, 16], [[[0, 0, 16, 16],
[0, 16, 16, 32], [0, 16, 16, 32],
[16, 0, 32, 16], [16, 0, 32, 16],
...@@ -1198,13 +1257,13 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase): ...@@ -1198,13 +1257,13 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
[0, 0, 0, 0], # begin paddings [0, 0, 0, 0], # begin paddings
[0, 0, 0, 0], [0, 0, 0, 0],
[0, 0, 0, 0], [0, 0, 0, 0],
[0, 0, 0, 0]]], dtype=tf.float32) [0, 0, 0, 0]]], dtype=np.float32)
refined_box_encodings = tf.zeros( refined_box_encodings = np.zeros(
(batch_size * model.max_num_proposals, (batch_size * second_stage_batch_size, 1
model.num_classes, if shared_boxes else num_classes, BOX_CODE_SIZE),
BOX_CODE_SIZE), dtype=tf.float32) dtype=np.float32)
class_predictions_with_background = tf.constant( class_predictions_with_background = np.array(
[[-10, 10, -10], # first image [[-10, 10, -10], # first image
[10, -10, -10], [10, -10, -10],
[10, -10, -10], [10, -10, -10],
...@@ -1216,7 +1275,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase): ...@@ -1216,7 +1275,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
[0, 0, 0], # begin paddings [0, 0, 0], # begin paddings
[0, 0, 0], [0, 0, 0],
[0, 0, 0], [0, 0, 0],
[0, 0, 0],], dtype=tf.float32) [0, 0, 0],], dtype=np.float32)
# The first groundtruth box is 4/5 of the anchor size in both directions # The first groundtruth box is 4/5 of the anchor size in both directions
# experiencing a loss of: # experiencing a loss of:
...@@ -1225,38 +1284,29 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase): ...@@ -1225,38 +1284,29 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
# The second groundtruth box is identical to the prediction and thus # The second groundtruth box is identical to the prediction and thus
# experiences zero loss. # experiences zero loss.
# Total average loss is (abs(5 * log(1/2)) - .5) / 3. # Total average loss is (abs(5 * log(1/2)) - .5) / 3.
groundtruth_boxes_list = [ groundtruth_boxes = np.stack([
tf.constant([[0.05, 0.05, 0.45, 0.45]], dtype=tf.float32), np.array([[0.05, 0.05, 0.45, 0.45]], dtype=np.float32),
tf.constant([[0.0, 0.0, 0.5, 0.5]], dtype=tf.float32)] np.array([[0.0, 0.0, 0.5, 0.5]], dtype=np.float32)])
groundtruth_classes_list = [tf.constant([[1, 0]], dtype=tf.float32), groundtruth_classes = np.stack([np.array([[1, 0]], dtype=np.float32),
tf.constant([[0, 1]], dtype=tf.float32)] np.array([[0, 1]], dtype=np.float32)])
exp_loc_loss = (-5 * np.log(.8) - 0.5) / 3.0
execute_fn = self.execute_cpu
if use_static_shapes:
execute_fn = self.execute
results = execute_fn(graph_fn, [
anchors, rpn_box_encodings, rpn_objectness_predictions_with_background,
images, num_proposals, proposal_boxes, refined_box_encodings,
class_predictions_with_background, groundtruth_boxes,
groundtruth_classes
])
prediction_dict = { exp_loc_loss = (-5 * np.log(.8) - 0.5) / 3.0
'rpn_box_encodings': rpn_box_encodings,
'rpn_objectness_predictions_with_background':
rpn_objectness_predictions_with_background,
'image_shape': image_shape,
'anchors': anchors,
'refined_box_encodings': refined_box_encodings,
'class_predictions_with_background': class_predictions_with_background,
'proposal_boxes': proposal_boxes,
'num_proposals': num_proposals
}
_, true_image_shapes = model.preprocess(tf.zeros(image_shape))
model.provide_groundtruth(groundtruth_boxes_list,
groundtruth_classes_list)
loss_dict = model.loss(prediction_dict, true_image_shapes)
with self.test_session() as sess: self.assertAllClose(results[0], exp_loc_loss, rtol=1e-4, atol=1e-4)
loss_dict_out = sess.run(loss_dict) self.assertAllClose(results[1], 0.0)
self.assertAllClose(loss_dict_out['Loss/RPNLoss/localization_loss'], self.assertAllClose(results[2], exp_loc_loss, rtol=1e-4, atol=1e-4)
exp_loc_loss) self.assertAllClose(results[3], 0.0)
self.assertAllClose(loss_dict_out['Loss/RPNLoss/objectness_loss'], 0)
self.assertAllClose(loss_dict_out[
'Loss/BoxClassifierLoss/localization_loss'], exp_loc_loss)
self.assertAllClose(loss_dict_out[
'Loss/BoxClassifierLoss/classification_loss'], 0)
def test_loss_with_hard_mining(self): def test_loss_with_hard_mining(self):
model = self._build_model(is_training=True, model = self._build_model(is_training=True,
...@@ -1346,10 +1396,14 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase): ...@@ -1346,10 +1396,14 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
self.assertAllClose(loss_dict_out[ self.assertAllClose(loss_dict_out[
'Loss/BoxClassifierLoss/classification_loss'], 0) 'Loss/BoxClassifierLoss/classification_loss'], 0)
def test_loss_full_with_shared_boxes(self): def test_loss_with_hard_mining_and_losses_mask(self):
model = self._build_model( model = self._build_model(is_training=True,
is_training=True, number_of_stages=2, second_stage_batch_size=6) number_of_stages=2,
second_stage_batch_size=None,
first_stage_max_proposals=6,
hard_mining=True)
batch_size = 2 batch_size = 2
number_of_proposals = 3
anchors = tf.constant( anchors = tf.constant(
[[0, 0, 16, 16], [[0, 0, 16, 16],
[0, 16, 16, 32], [0, 16, 16, 32],
...@@ -1361,63 +1415,77 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase): ...@@ -1361,63 +1415,77 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
BOX_CODE_SIZE], dtype=tf.float32) BOX_CODE_SIZE], dtype=tf.float32)
# use different numbers for the objectness category to break ties in # use different numbers for the objectness category to break ties in
# order of boxes returned by NMS # order of boxes returned by NMS
rpn_objectness_predictions_with_background = tf.constant([ rpn_objectness_predictions_with_background = tf.constant(
[[-10, 13], [[[-10, 13],
[10, -10], [-10, 12],
[10, -11], [10, -11],
[-10, 12]], [10, -12]],
[[10, -10], [[-10, 13],
[-10, 13], [-10, 12],
[-10, 12], [10, -11],
[10, -11]]], dtype=tf.float32) [10, -12]]], dtype=tf.float32)
image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32) image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32)
num_proposals = tf.constant([6, 6], dtype=tf.int32) # box_classifier_batch_size is 6, but here we assume that the number of
# actual proposals (not counting zero paddings) is fewer (3).
num_proposals = tf.constant([number_of_proposals, number_of_proposals],
dtype=tf.int32)
proposal_boxes = tf.constant( proposal_boxes = tf.constant(
2 * [[[0, 0, 16, 16], [[[0, 0, 16, 16], # first image
[0, 16, 16, 32], [0, 16, 16, 32],
[16, 0, 32, 16], [16, 0, 32, 16],
[16, 16, 32, 32], [0, 0, 0, 0], # begin paddings
[0, 0, 16, 16], [0, 0, 0, 0],
[0, 16, 16, 32]]], dtype=tf.float32) [0, 0, 0, 0]],
[[0, 0, 16, 16], # second image
[0, 16, 16, 32],
[16, 0, 32, 16],
[0, 0, 0, 0], # begin paddings
[0, 0, 0, 0],
[0, 0, 0, 0]]], dtype=tf.float32)
refined_box_encodings = tf.zeros( refined_box_encodings = tf.zeros(
(batch_size * model.max_num_proposals, (batch_size * model.max_num_proposals,
1, # one box shared among all the classes model.num_classes,
BOX_CODE_SIZE), dtype=tf.float32) BOX_CODE_SIZE), dtype=tf.float32)
class_predictions_with_background = tf.constant( class_predictions_with_background = tf.constant(
[[-10, 10, -10], # first image [[-10, 10, -10], # first image
[10, -10, -10],
[10, -10, -10],
[-10, -10, 10], [-10, -10, 10],
[-10, 10, -10],
[10, -10, -10],
[10, -10, -10], # second image
[-10, 10, -10],
[-10, 10, -10],
[10, -10, -10], [10, -10, -10],
[0, 0, 0], # begin paddings
[0, 0, 0],
[0, 0, 0],
[-10, 10, -10], # second image
[-10, -10, 10],
[10, -10, -10], [10, -10, -10],
[-10, 10, -10]], dtype=tf.float32) [0, 0, 0], # begin paddings
[0, 0, 0],
mask_predictions_logits = 20 * tf.ones((batch_size * [0, 0, 0]], dtype=tf.float32)
model.max_num_proposals,
model.num_classes,
14, 14),
dtype=tf.float32)
# The first groundtruth box is 4/5 of the anchor size in both directions
# experiencing a loss of:
# 2 * SmoothL1(5 * log(4/5)) / (num_proposals * batch_size)
# = 2 * (abs(5 * log(1/2)) - .5) / 3
# The second groundtruth box is 46/50 of the anchor size in both directions
# experiencing a loss of:
# 2 * SmoothL1(5 * log(42/50)) / (num_proposals * batch_size)
# = 2 * (.5(5 * log(.92))^2 - .5) / 3.
# Since the first groundtruth box experiences greater loss, and we have
# set num_hard_examples=1 in the HardMiner, the final localization loss
# corresponds to that of the first groundtruth box.
groundtruth_boxes_list = [ groundtruth_boxes_list = [
tf.constant([[0, 0, .5, .5], [.5, .5, 1, 1]], dtype=tf.float32), tf.constant([[0.05, 0.05, 0.45, 0.45],
tf.constant([[0, .5, .5, 1], [.5, 0, 1, .5]], dtype=tf.float32)] [0.02, 0.52, 0.48, 0.98]], dtype=tf.float32),
groundtruth_classes_list = [tf.constant([[1, 0], [0, 1]], dtype=tf.float32), tf.constant([[0.05, 0.05, 0.45, 0.45],
tf.constant([[1, 0], [1, 0]], dtype=tf.float32)] [0.02, 0.52, 0.48, 0.98]], dtype=tf.float32)]
groundtruth_classes_list = [
tf.constant([[1, 0], [0, 1]], dtype=tf.float32),
tf.constant([[1, 0], [0, 1]], dtype=tf.float32)]
is_annotated_list = [tf.constant(True, dtype=tf.bool),
tf.constant(False, dtype=tf.bool)]
exp_loc_loss = (2 * (-5 * np.log(.8) - 0.5) /
(number_of_proposals * batch_size))
# Set all elements of groundtruth mask to 1.0. In this case all proposal
# crops of the groundtruth masks should return a mask that covers the entire
# proposal. Thus, if mask_predictions_logits element values are all greater
# than 20, the loss should be zero.
groundtruth_masks_list = [tf.convert_to_tensor(np.ones((2, 32, 32)),
dtype=tf.float32),
tf.convert_to_tensor(np.ones((2, 32, 32)),
dtype=tf.float32)]
prediction_dict = { prediction_dict = {
'rpn_box_encodings': rpn_box_encodings, 'rpn_box_encodings': rpn_box_encodings,
'rpn_objectness_predictions_with_background': 'rpn_objectness_predictions_with_background':
...@@ -1427,24 +1495,20 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase): ...@@ -1427,24 +1495,20 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
'refined_box_encodings': refined_box_encodings, 'refined_box_encodings': refined_box_encodings,
'class_predictions_with_background': class_predictions_with_background, 'class_predictions_with_background': class_predictions_with_background,
'proposal_boxes': proposal_boxes, 'proposal_boxes': proposal_boxes,
'num_proposals': num_proposals, 'num_proposals': num_proposals
'mask_predictions': mask_predictions_logits
} }
_, true_image_shapes = model.preprocess(tf.zeros(image_shape)) _, true_image_shapes = model.preprocess(tf.zeros(image_shape))
model.provide_groundtruth(groundtruth_boxes_list, model.provide_groundtruth(groundtruth_boxes_list,
groundtruth_classes_list, groundtruth_classes_list,
groundtruth_masks_list) is_annotated_list=is_annotated_list)
loss_dict = model.loss(prediction_dict, true_image_shapes) loss_dict = model.loss(prediction_dict, true_image_shapes)
with self.test_session() as sess: with self.test_session() as sess:
loss_dict_out = sess.run(loss_dict) loss_dict_out = sess.run(loss_dict)
self.assertAllClose(loss_dict_out['Loss/RPNLoss/localization_loss'], 0)
self.assertAllClose(loss_dict_out['Loss/RPNLoss/objectness_loss'], 0)
self.assertAllClose(loss_dict_out[ self.assertAllClose(loss_dict_out[
'Loss/BoxClassifierLoss/localization_loss'], 0) 'Loss/BoxClassifierLoss/localization_loss'], exp_loc_loss)
self.assertAllClose(loss_dict_out[ self.assertAllClose(loss_dict_out[
'Loss/BoxClassifierLoss/classification_loss'], 0) 'Loss/BoxClassifierLoss/classification_loss'], 0)
self.assertAllClose(loss_dict_out['Loss/BoxClassifierLoss/mask_loss'], 0)
def test_restore_map_for_classification_ckpt(self): def test_restore_map_for_classification_ckpt(self):
# Define mock tensorflow classification graph and save variables. # Define mock tensorflow classification graph and save variables.
......
...@@ -62,11 +62,11 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): ...@@ -62,11 +62,11 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
first_stage_box_predictor_depth, first_stage_box_predictor_depth,
first_stage_minibatch_size, first_stage_minibatch_size,
first_stage_sampler, first_stage_sampler,
first_stage_nms_score_threshold, first_stage_non_max_suppression_fn,
first_stage_nms_iou_threshold,
first_stage_max_proposals, first_stage_max_proposals,
first_stage_localization_loss_weight, first_stage_localization_loss_weight,
first_stage_objectness_loss_weight, first_stage_objectness_loss_weight,
crop_and_resize_fn,
second_stage_target_assigner, second_stage_target_assigner,
second_stage_rfcn_box_predictor, second_stage_rfcn_box_predictor,
second_stage_batch_size, second_stage_batch_size,
...@@ -79,8 +79,9 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): ...@@ -79,8 +79,9 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
hard_example_miner, hard_example_miner,
parallel_iterations=16, parallel_iterations=16,
add_summaries=True, add_summaries=True,
use_matmul_crop_and_resize=False, clip_anchors_to_image=False,
clip_anchors_to_image=False): use_static_shapes=False,
resize_masks=False):
"""RFCNMetaArch Constructor. """RFCNMetaArch Constructor.
Args: Args:
...@@ -123,18 +124,22 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): ...@@ -123,18 +124,22 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
only called "batch_size" due to terminology from the Faster R-CNN paper. only called "batch_size" due to terminology from the Faster R-CNN paper.
first_stage_sampler: The sampler for the boxes used to calculate the RPN first_stage_sampler: The sampler for the boxes used to calculate the RPN
loss after the first stage. loss after the first stage.
first_stage_nms_score_threshold: Score threshold for non max suppression first_stage_non_max_suppression_fn: batch_multiclass_non_max_suppression
for the Region Proposal Network (RPN). This value is expected to be in callable that takes `boxes`, `scores` and optional `clip_window`(with
[0, 1] as it is applied directly after a softmax transformation. The all other inputs already set) and returns a dictionary containing
recommended value for Faster R-CNN is 0. tensors with keys: `detection_boxes`, `detection_scores`,
first_stage_nms_iou_threshold: The Intersection Over Union (IOU) threshold `detection_classes`, `num_detections`. This is used to perform non max
for performing Non-Max Suppression (NMS) on the boxes predicted by the suppression on the boxes predicted by the Region Proposal Network
Region Proposal Network (RPN). (RPN).
See `post_processing.batch_multiclass_non_max_suppression` for the type
and shape of these tensors.
first_stage_max_proposals: Maximum number of boxes to retain after first_stage_max_proposals: Maximum number of boxes to retain after
performing Non-Max Suppression (NMS) on the boxes predicted by the performing Non-Max Suppression (NMS) on the boxes predicted by the
Region Proposal Network (RPN). Region Proposal Network (RPN).
first_stage_localization_loss_weight: A float first_stage_localization_loss_weight: A float
first_stage_objectness_loss_weight: A float first_stage_objectness_loss_weight: A float
crop_and_resize_fn: A differentiable resampler to use for cropping RPN
proposal features.
second_stage_target_assigner: Target assigner to use for second stage of second_stage_target_assigner: Target assigner to use for second stage of
R-FCN. If the model is configured with multiple prediction heads, this R-FCN. If the model is configured with multiple prediction heads, this
target assigner is used to generate targets for all heads (with the target assigner is used to generate targets for all heads (with the
...@@ -168,12 +173,13 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): ...@@ -168,12 +173,13 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
in parallel for calls to tf.map_fn. in parallel for calls to tf.map_fn.
add_summaries: boolean (default: True) controlling whether summary ops add_summaries: boolean (default: True) controlling whether summary ops
should be added to tensorflow graph. should be added to tensorflow graph.
use_matmul_crop_and_resize: Force the use of matrix multiplication based
crop and resize instead of standard tf.image.crop_and_resize while
computing second stage input feature maps.
clip_anchors_to_image: The anchors generated are clip to the clip_anchors_to_image: The anchors generated are clip to the
window size without filtering the nonoverlapping anchors. This generates window size without filtering the nonoverlapping anchors. This generates
a static number of anchors. This argument is unused. a static number of anchors. This argument is unused.
use_static_shapes: If True, uses implementation of ops with static shape
guarantees.
resize_masks: Indicates whether the masks presend in the groundtruth
should be resized in the model with `image_resizer_fn`
Raises: Raises:
ValueError: If `second_stage_batch_size` > `first_stage_max_proposals` ValueError: If `second_stage_batch_size` > `first_stage_max_proposals`
...@@ -196,11 +202,11 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): ...@@ -196,11 +202,11 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
first_stage_box_predictor_depth, first_stage_box_predictor_depth,
first_stage_minibatch_size, first_stage_minibatch_size,
first_stage_sampler, first_stage_sampler,
first_stage_nms_score_threshold, first_stage_non_max_suppression_fn,
first_stage_nms_iou_threshold,
first_stage_max_proposals, first_stage_max_proposals,
first_stage_localization_loss_weight, first_stage_localization_loss_weight,
first_stage_objectness_loss_weight, first_stage_objectness_loss_weight,
crop_and_resize_fn,
None, # initial_crop_size is not used in R-FCN None, # initial_crop_size is not used in R-FCN
None, # maxpool_kernel_size is not use in R-FCN None, # maxpool_kernel_size is not use in R-FCN
None, # maxpool_stride is not use in R-FCN None, # maxpool_stride is not use in R-FCN
...@@ -215,7 +221,11 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): ...@@ -215,7 +221,11 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
second_stage_classification_loss, second_stage_classification_loss,
1.0, # second stage mask prediction loss weight isn't used in R-FCN. 1.0, # second stage mask prediction loss weight isn't used in R-FCN.
hard_example_miner, hard_example_miner,
parallel_iterations) parallel_iterations,
add_summaries,
clip_anchors_to_image,
use_static_shapes,
resize_masks)
self._rfcn_box_predictor = second_stage_rfcn_box_predictor self._rfcn_box_predictor = second_stage_rfcn_box_predictor
......
...@@ -125,12 +125,13 @@ class SSDKerasFeatureExtractor(tf.keras.Model): ...@@ -125,12 +125,13 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
depth_multiplier, depth_multiplier,
min_depth, min_depth,
pad_to_multiple, pad_to_multiple,
conv_hyperparams_config, conv_hyperparams,
freeze_batchnorm, freeze_batchnorm,
inplace_batchnorm_update, inplace_batchnorm_update,
use_explicit_padding=False, use_explicit_padding=False,
use_depthwise=False, use_depthwise=False,
override_base_feature_extractor_hyperparams=False): override_base_feature_extractor_hyperparams=False,
name=None):
"""Constructor. """Constructor.
Args: Args:
...@@ -139,9 +140,9 @@ class SSDKerasFeatureExtractor(tf.keras.Model): ...@@ -139,9 +140,9 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
min_depth: minimum feature extractor depth. min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
conv_hyperparams_config: A hyperparams.proto object containing conv_hyperparams: `hyperparams_builder.KerasLayerHyperparams` object
convolution hyperparameters for the layers added on top of the containing convolution hyperparameters for the layers added on top of
base feature extractor. the base feature extractor.
freeze_batchnorm: Whether to freeze batch norm parameters during freeze_batchnorm: Whether to freeze batch norm parameters during
training or not. When training with a small batch size (e.g. 1), it is training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm desirable to freeze batch norm update and use pretrained batch norm
...@@ -156,14 +157,16 @@ class SSDKerasFeatureExtractor(tf.keras.Model): ...@@ -156,14 +157,16 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
override_base_feature_extractor_hyperparams: Whether to override override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from hyperparameters of the base feature extractor with the one from
`conv_hyperparams_config`. `conv_hyperparams_config`.
name: A string name scope to assign to the model. If 'None', Keras
will auto-generate one from the class name.
""" """
super(SSDKerasFeatureExtractor, self).__init__() super(SSDKerasFeatureExtractor, self).__init__(name=name)
self._is_training = is_training self._is_training = is_training
self._depth_multiplier = depth_multiplier self._depth_multiplier = depth_multiplier
self._min_depth = min_depth self._min_depth = min_depth
self._pad_to_multiple = pad_to_multiple self._pad_to_multiple = pad_to_multiple
self._conv_hyperparams_config = conv_hyperparams_config self._conv_hyperparams = conv_hyperparams
self._freeze_batchnorm = freeze_batchnorm self._freeze_batchnorm = freeze_batchnorm
self._inplace_batchnorm_update = inplace_batchnorm_update self._inplace_batchnorm_update = inplace_batchnorm_update
self._use_explicit_padding = use_explicit_padding self._use_explicit_padding = use_explicit_padding
...@@ -225,10 +228,7 @@ class SSDMetaArch(model.DetectionModel): ...@@ -225,10 +228,7 @@ class SSDMetaArch(model.DetectionModel):
box_predictor, box_predictor,
box_coder, box_coder,
feature_extractor, feature_extractor,
matcher,
region_similarity_calculator,
encode_background_as_zeros, encode_background_as_zeros,
negative_class_weight,
image_resizer_fn, image_resizer_fn,
non_max_suppression_fn, non_max_suppression_fn,
score_conversion_fn, score_conversion_fn,
...@@ -238,14 +238,14 @@ class SSDMetaArch(model.DetectionModel): ...@@ -238,14 +238,14 @@ class SSDMetaArch(model.DetectionModel):
localization_loss_weight, localization_loss_weight,
normalize_loss_by_num_matches, normalize_loss_by_num_matches,
hard_example_miner, hard_example_miner,
target_assigner_instance,
add_summaries=True, add_summaries=True,
normalize_loc_loss_by_codesize=False, normalize_loc_loss_by_codesize=False,
freeze_batchnorm=False, freeze_batchnorm=False,
inplace_batchnorm_update=False, inplace_batchnorm_update=False,
add_background_class=True, add_background_class=True,
random_example_sampler=None, random_example_sampler=None,
expected_classification_loss_under_sampling=None, expected_classification_loss_under_sampling=None):
target_assigner_instance=None):
"""SSDMetaArch Constructor. """SSDMetaArch Constructor.
TODO(rathodv,jonathanhuang): group NMS parameters + score converter into TODO(rathodv,jonathanhuang): group NMS parameters + score converter into
...@@ -259,13 +259,9 @@ class SSDMetaArch(model.DetectionModel): ...@@ -259,13 +259,9 @@ class SSDMetaArch(model.DetectionModel):
box_predictor: a box_predictor.BoxPredictor object. box_predictor: a box_predictor.BoxPredictor object.
box_coder: a box_coder.BoxCoder object. box_coder: a box_coder.BoxCoder object.
feature_extractor: a SSDFeatureExtractor object. feature_extractor: a SSDFeatureExtractor object.
matcher: a matcher.Matcher object.
region_similarity_calculator: a
region_similarity_calculator.RegionSimilarityCalculator object.
encode_background_as_zeros: boolean determining whether background encode_background_as_zeros: boolean determining whether background
targets are to be encoded as an all zeros vector or a one-hot targets are to be encoded as an all zeros vector or a one-hot
vector (where background is the 0th class). vector (where background is the 0th class).
negative_class_weight: Weight for confidence loss of negative anchors.
image_resizer_fn: a callable for image resizing. This callable always image_resizer_fn: a callable for image resizing. This callable always
takes a rank-3 image tensor (corresponding to a single image) and takes a rank-3 image tensor (corresponding to a single image) and
returns a rank-3 image tensor, possibly with new spatial dimensions and returns a rank-3 image tensor, possibly with new spatial dimensions and
...@@ -288,6 +284,7 @@ class SSDMetaArch(model.DetectionModel): ...@@ -288,6 +284,7 @@ class SSDMetaArch(model.DetectionModel):
localization_loss_weight: float localization_loss_weight: float
normalize_loss_by_num_matches: boolean normalize_loss_by_num_matches: boolean
hard_example_miner: a losses.HardExampleMiner object (can be None) hard_example_miner: a losses.HardExampleMiner object (can be None)
target_assigner_instance: target_assigner.TargetAssigner instance to use.
add_summaries: boolean (default: True) controlling whether summary ops add_summaries: boolean (default: True) controlling whether summary ops
should be added to tensorflow graph. should be added to tensorflow graph.
normalize_loc_loss_by_codesize: whether to normalize localization loss normalize_loc_loss_by_codesize: whether to normalize localization loss
...@@ -312,7 +309,6 @@ class SSDMetaArch(model.DetectionModel): ...@@ -312,7 +309,6 @@ class SSDMetaArch(model.DetectionModel):
the random sampled examples. the random sampled examples.
expected_classification_loss_under_sampling: If not None, use expected_classification_loss_under_sampling: If not None, use
to calcualte classification loss by background/foreground weighting. to calcualte classification loss by background/foreground weighting.
target_assigner_instance: target_assigner.TargetAssigner instance to use.
""" """
super(SSDMetaArch, self).__init__(num_classes=box_predictor.num_classes) super(SSDMetaArch, self).__init__(num_classes=box_predictor.num_classes)
self._is_training = is_training self._is_training = is_training
...@@ -324,8 +320,6 @@ class SSDMetaArch(model.DetectionModel): ...@@ -324,8 +320,6 @@ class SSDMetaArch(model.DetectionModel):
self._box_coder = box_coder self._box_coder = box_coder
self._feature_extractor = feature_extractor self._feature_extractor = feature_extractor
self._matcher = matcher
self._region_similarity_calculator = region_similarity_calculator
self._add_background_class = add_background_class self._add_background_class = add_background_class
# Needed for fine-tuning from classification checkpoints whose # Needed for fine-tuning from classification checkpoints whose
...@@ -347,14 +341,7 @@ class SSDMetaArch(model.DetectionModel): ...@@ -347,14 +341,7 @@ class SSDMetaArch(model.DetectionModel):
self._unmatched_class_label = tf.constant((self.num_classes + 1) * [0], self._unmatched_class_label = tf.constant((self.num_classes + 1) * [0],
tf.float32) tf.float32)
if target_assigner_instance: self._target_assigner = target_assigner_instance
self._target_assigner = target_assigner_instance
else:
self._target_assigner = target_assigner.TargetAssigner(
self._region_similarity_calculator,
self._matcher,
self._box_coder,
negative_class_weight=negative_class_weight)
self._classification_loss = classification_loss self._classification_loss = classification_loss
self._localization_loss = localization_loss self._localization_loss = localization_loss
...@@ -523,28 +510,25 @@ class SSDMetaArch(model.DetectionModel): ...@@ -523,28 +510,25 @@ class SSDMetaArch(model.DetectionModel):
im_height=image_shape[1], im_height=image_shape[1],
im_width=image_shape[2])) im_width=image_shape[2]))
if self._box_predictor.is_keras_model: if self._box_predictor.is_keras_model:
prediction_dict = self._box_predictor(feature_maps) predictor_results_dict = self._box_predictor(feature_maps)
else: else:
with slim.arg_scope([slim.batch_norm], with slim.arg_scope([slim.batch_norm],
is_training=(self._is_training and is_training=(self._is_training and
not self._freeze_batchnorm), not self._freeze_batchnorm),
updates_collections=batchnorm_updates_collections): updates_collections=batchnorm_updates_collections):
prediction_dict = self._box_predictor.predict( predictor_results_dict = self._box_predictor.predict(
feature_maps, self._anchor_generator.num_anchors_per_location()) feature_maps, self._anchor_generator.num_anchors_per_location())
box_encodings = tf.concat(prediction_dict['box_encodings'], axis=1)
if box_encodings.shape.ndims == 4 and box_encodings.shape[2] == 1:
box_encodings = tf.squeeze(box_encodings, axis=2)
class_predictions_with_background = tf.concat(
prediction_dict['class_predictions_with_background'], axis=1)
predictions_dict = { predictions_dict = {
'preprocessed_inputs': preprocessed_inputs, 'preprocessed_inputs': preprocessed_inputs,
'box_encodings': box_encodings,
'class_predictions_with_background':
class_predictions_with_background,
'feature_maps': feature_maps, 'feature_maps': feature_maps,
'anchors': self._anchors.get() 'anchors': self._anchors.get()
} }
for prediction_key, prediction_list in iter(predictor_results_dict.items()):
prediction = tf.concat(prediction_list, axis=1)
if (prediction_key == 'box_encodings' and prediction.shape.ndims == 4 and
prediction.shape[2] == 1):
prediction = tf.squeeze(prediction, axis=2)
predictions_dict[prediction_key] = prediction
self._batched_prediction_tensor_names = [x for x in predictions_dict self._batched_prediction_tensor_names = [x for x in predictions_dict
if x != 'anchors'] if x != 'anchors']
return predictions_dict return predictions_dict
...@@ -587,6 +571,10 @@ class SSDMetaArch(model.DetectionModel): ...@@ -587,6 +571,10 @@ class SSDMetaArch(model.DetectionModel):
[batch_size, num_anchors, num_classes+1] containing class predictions [batch_size, num_anchors, num_classes+1] containing class predictions
(logits) for each of the anchors. Note that this tensor *includes* (logits) for each of the anchors. Note that this tensor *includes*
background class predictions. background class predictions.
4) mask_predictions: (optional) a 5-D float tensor of shape
[batch_size, num_anchors, q, mask_height, mask_width]. `q` can be
either number of classes or 1 depending on whether a separate mask is
predicted per class.
true_image_shapes: int32 tensor of shape [batch, 3] where each row is true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded of true images in the resized images, as resized images can be padded
...@@ -599,6 +587,8 @@ class SSDMetaArch(model.DetectionModel): ...@@ -599,6 +587,8 @@ class SSDMetaArch(model.DetectionModel):
detection_classes: [batch, max_detections] detection_classes: [batch, max_detections]
detection_keypoints: [batch, max_detections, num_keypoints, 2] (if detection_keypoints: [batch, max_detections, num_keypoints, 2] (if
encoded in the prediction_dict 'box_encodings') encoded in the prediction_dict 'box_encodings')
detection_masks: [batch_size, max_detections, mask_height, mask_width]
(optional)
num_detections: [batch] num_detections: [batch]
Raises: Raises:
ValueError: if prediction_dict does not contain `box_encodings` or ValueError: if prediction_dict does not contain `box_encodings` or
...@@ -627,13 +617,14 @@ class SSDMetaArch(model.DetectionModel): ...@@ -627,13 +617,14 @@ class SSDMetaArch(model.DetectionModel):
if detection_keypoints is not None: if detection_keypoints is not None:
additional_fields = { additional_fields = {
fields.BoxListFields.keypoints: detection_keypoints} fields.BoxListFields.keypoints: detection_keypoints}
(nmsed_boxes, nmsed_scores, nmsed_classes, _, nmsed_additional_fields, (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
num_detections) = self._non_max_suppression_fn( nmsed_additional_fields, num_detections) = self._non_max_suppression_fn(
detection_boxes, detection_boxes,
detection_scores, detection_scores,
clip_window=self._compute_clip_window( clip_window=self._compute_clip_window(preprocessed_images,
preprocessed_images, true_image_shapes), true_image_shapes),
additional_fields=additional_fields) additional_fields=additional_fields,
masks=prediction_dict.get('mask_predictions'))
detection_dict = { detection_dict = {
fields.DetectionResultFields.detection_boxes: nmsed_boxes, fields.DetectionResultFields.detection_boxes: nmsed_boxes,
fields.DetectionResultFields.detection_scores: nmsed_scores, fields.DetectionResultFields.detection_scores: nmsed_scores,
...@@ -645,6 +636,9 @@ class SSDMetaArch(model.DetectionModel): ...@@ -645,6 +636,9 @@ class SSDMetaArch(model.DetectionModel):
fields.BoxListFields.keypoints in nmsed_additional_fields): fields.BoxListFields.keypoints in nmsed_additional_fields):
detection_dict[fields.DetectionResultFields.detection_keypoints] = ( detection_dict[fields.DetectionResultFields.detection_keypoints] = (
nmsed_additional_fields[fields.BoxListFields.keypoints]) nmsed_additional_fields[fields.BoxListFields.keypoints])
if nmsed_masks is not None:
detection_dict[
fields.DetectionResultFields.detection_masks] = nmsed_masks
return detection_dict return detection_dict
def loss(self, prediction_dict, true_image_shapes, scope=None): def loss(self, prediction_dict, true_image_shapes, scope=None):
...@@ -701,16 +695,22 @@ class SSDMetaArch(model.DetectionModel): ...@@ -701,16 +695,22 @@ class SSDMetaArch(model.DetectionModel):
batch_cls_weights = tf.multiply(batch_sampled_indicator, batch_cls_weights = tf.multiply(batch_sampled_indicator,
batch_cls_weights) batch_cls_weights)
losses_mask = None
if self.groundtruth_has_field(fields.InputDataFields.is_annotated):
losses_mask = tf.stack(self.groundtruth_lists(
fields.InputDataFields.is_annotated))
location_losses = self._localization_loss( location_losses = self._localization_loss(
prediction_dict['box_encodings'], prediction_dict['box_encodings'],
batch_reg_targets, batch_reg_targets,
ignore_nan_targets=True, ignore_nan_targets=True,
weights=batch_reg_weights) weights=batch_reg_weights,
losses_mask=losses_mask)
cls_losses = self._classification_loss( cls_losses = self._classification_loss(
prediction_dict['class_predictions_with_background'], prediction_dict['class_predictions_with_background'],
batch_cls_targets, batch_cls_targets,
weights=batch_cls_weights) weights=batch_cls_weights,
losses_mask=losses_mask)
if self._expected_classification_loss_under_sampling: if self._expected_classification_loss_under_sampling:
if cls_losses.get_shape().ndims == 3: if cls_losses.get_shape().ndims == 3:
...@@ -734,12 +734,6 @@ class SSDMetaArch(model.DetectionModel): ...@@ -734,12 +734,6 @@ class SSDMetaArch(model.DetectionModel):
self._hard_example_miner.summarize() self._hard_example_miner.summarize()
else: else:
cls_losses = ops.reduce_sum_trailing_dimensions(cls_losses, ndims=2) cls_losses = ops.reduce_sum_trailing_dimensions(cls_losses, ndims=2)
if self._add_summaries:
class_ids = tf.argmax(batch_cls_targets, axis=2)
flattened_class_ids = tf.reshape(class_ids, [-1])
flattened_classification_losses = tf.reshape(cls_losses, [-1])
self._summarize_anchor_classification_loss(
flattened_class_ids, flattened_classification_losses)
localization_loss = tf.reduce_sum(location_losses) localization_loss = tf.reduce_sum(location_losses)
classification_loss = tf.reduce_sum(cls_losses) classification_loss = tf.reduce_sum(cls_losses)
......
...@@ -14,105 +14,26 @@ ...@@ -14,105 +14,26 @@
# ============================================================================== # ==============================================================================
"""Tests for object_detection.meta_architectures.ssd_meta_arch.""" """Tests for object_detection.meta_architectures.ssd_meta_arch."""
import functools
from absl.testing import parameterized from absl.testing import parameterized
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from object_detection.core import anchor_generator
from object_detection.core import balanced_positive_negative_sampler as sampler
from object_detection.core import box_list
from object_detection.core import losses
from object_detection.core import post_processing
from object_detection.core import region_similarity_calculator as sim_calc
from object_detection.core import target_assigner
from object_detection.meta_architectures import ssd_meta_arch from object_detection.meta_architectures import ssd_meta_arch
from object_detection.utils import ops from object_detection.meta_architectures import ssd_meta_arch_test_lib
from object_detection.utils import test_case
from object_detection.utils import test_utils from object_detection.utils import test_utils
slim = tf.contrib.slim slim = tf.contrib.slim
keras = tf.keras.layers keras = tf.keras.layers
class FakeSSDFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
def __init__(self):
super(FakeSSDFeatureExtractor, self).__init__(
is_training=True,
depth_multiplier=0,
min_depth=0,
pad_to_multiple=1,
conv_hyperparams_fn=None)
def preprocess(self, resized_inputs):
return tf.identity(resized_inputs)
def extract_features(self, preprocessed_inputs):
with tf.variable_scope('mock_model'):
features = slim.conv2d(inputs=preprocessed_inputs, num_outputs=32,
kernel_size=1, scope='layer1')
return [features]
class FakeSSDKerasFeatureExtractor(ssd_meta_arch.SSDKerasFeatureExtractor):
def __init__(self):
with tf.name_scope('mock_model'):
super(FakeSSDKerasFeatureExtractor, self).__init__(
is_training=True,
depth_multiplier=0,
min_depth=0,
pad_to_multiple=1,
conv_hyperparams_config=None,
freeze_batchnorm=False,
inplace_batchnorm_update=False,
)
self._conv = keras.Conv2D(filters=32, kernel_size=1, name='layer1')
def preprocess(self, resized_inputs):
return tf.identity(resized_inputs)
def _extract_features(self, preprocessed_inputs, **kwargs):
with tf.name_scope('mock_model'):
return [self._conv(preprocessed_inputs)]
class MockAnchorGenerator2x2(anchor_generator.AnchorGenerator):
"""Sets up a simple 2x2 anchor grid on the unit square."""
def name_scope(self):
return 'MockAnchorGenerator'
def num_anchors_per_location(self):
return [1]
def _generate(self, feature_map_shape_list, im_height, im_width):
return [box_list.BoxList(
tf.constant([[0, 0, .5, .5],
[0, .5, .5, 1],
[.5, 0, 1, .5],
[1., 1., 1.5, 1.5] # Anchor that is outside clip_window.
], tf.float32))]
def num_anchors(self):
return 4
def _get_value_for_matching_key(dictionary, suffix):
for key in dictionary.keys():
if key.endswith(suffix):
return dictionary[key]
raise ValueError('key not found {}'.format(suffix))
@parameterized.parameters( @parameterized.parameters(
{'use_keras': False}, {'use_keras': False},
{'use_keras': True}, {'use_keras': True},
) )
class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase): class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
parameterized.TestCase):
def _create_model(self, def _create_model(self,
apply_hard_mining=True, apply_hard_mining=True,
...@@ -123,96 +44,25 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase): ...@@ -123,96 +44,25 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
use_expected_classification_loss_under_sampling=False, use_expected_classification_loss_under_sampling=False,
minimum_negative_sampling=1, minimum_negative_sampling=1,
desired_negative_sampling_ratio=3, desired_negative_sampling_ratio=3,
use_keras=False): use_keras=False,
is_training = False predict_mask=False,
num_classes = 1 use_static_shapes=False,
mock_anchor_generator = MockAnchorGenerator2x2() nms_max_size_per_class=5):
if use_keras: return super(SsdMetaArchTest, self)._create_model(
mock_box_predictor = test_utils.MockKerasBoxPredictor( model_fn=ssd_meta_arch.SSDMetaArch,
is_training, num_classes) apply_hard_mining=apply_hard_mining,
else:
mock_box_predictor = test_utils.MockBoxPredictor(
is_training, num_classes)
mock_box_coder = test_utils.MockBoxCoder()
if use_keras:
fake_feature_extractor = FakeSSDKerasFeatureExtractor()
else:
fake_feature_extractor = FakeSSDFeatureExtractor()
mock_matcher = test_utils.MockMatcher()
region_similarity_calculator = sim_calc.IouSimilarity()
encode_background_as_zeros = False
def image_resizer_fn(image):
return [tf.identity(image), tf.shape(image)]
classification_loss = losses.WeightedSigmoidClassificationLoss()
localization_loss = losses.WeightedSmoothL1LocalizationLoss()
non_max_suppression_fn = functools.partial(
post_processing.batch_multiclass_non_max_suppression,
score_thresh=-20.0,
iou_thresh=1.0,
max_size_per_class=5,
max_total_size=5)
classification_loss_weight = 1.0
localization_loss_weight = 1.0
negative_class_weight = 1.0
normalize_loss_by_num_matches = False
hard_example_miner = None
if apply_hard_mining:
# This hard example miner is expected to be a no-op.
hard_example_miner = losses.HardExampleMiner(
num_hard_examples=None,
iou_threshold=1.0)
random_example_sampler = None
if random_example_sampling:
random_example_sampler = sampler.BalancedPositiveNegativeSampler(
positive_fraction=0.5)
target_assigner_instance = target_assigner.TargetAssigner(
region_similarity_calculator,
mock_matcher,
mock_box_coder,
negative_class_weight=negative_class_weight,
weight_regression_loss_by_score=weight_regression_loss_by_score)
expected_classification_loss_under_sampling = None
if use_expected_classification_loss_under_sampling:
expected_classification_loss_under_sampling = functools.partial(
ops.expected_classification_loss_under_sampling,
minimum_negative_sampling=minimum_negative_sampling,
desired_negative_sampling_ratio=desired_negative_sampling_ratio)
code_size = 4
model = ssd_meta_arch.SSDMetaArch(
is_training,
mock_anchor_generator,
mock_box_predictor,
mock_box_coder,
fake_feature_extractor,
mock_matcher,
region_similarity_calculator,
encode_background_as_zeros,
negative_class_weight,
image_resizer_fn,
non_max_suppression_fn,
tf.identity,
classification_loss,
localization_loss,
classification_loss_weight,
localization_loss_weight,
normalize_loss_by_num_matches,
hard_example_miner,
target_assigner_instance=target_assigner_instance,
add_summaries=False,
normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize, normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
freeze_batchnorm=False,
inplace_batchnorm_update=False,
add_background_class=add_background_class, add_background_class=add_background_class,
random_example_sampler=random_example_sampler, random_example_sampling=random_example_sampling,
expected_classification_loss_under_sampling= weight_regression_loss_by_score=weight_regression_loss_by_score,
expected_classification_loss_under_sampling) use_expected_classification_loss_under_sampling=
return model, num_classes, mock_anchor_generator.num_anchors(), code_size use_expected_classification_loss_under_sampling,
minimum_negative_sampling=minimum_negative_sampling,
desired_negative_sampling_ratio=desired_negative_sampling_ratio,
use_keras=use_keras,
predict_mask=predict_mask,
use_static_shapes=use_static_shapes,
nms_max_size_per_class=nms_max_size_per_class)
def test_preprocess_preserves_shapes_with_dynamic_input_image( def test_preprocess_preserves_shapes_with_dynamic_input_image(
self, use_keras): self, use_keras):
...@@ -360,6 +210,7 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase): ...@@ -360,6 +210,7 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
self.assertAllClose(detections_out['num_detections'], self.assertAllClose(detections_out['num_detections'],
expected_num_detections) expected_num_detections)
def test_loss_results_are_correct(self, use_keras): def test_loss_results_are_correct(self, use_keras):
with tf.Graph().as_default(): with tf.Graph().as_default():
...@@ -374,9 +225,10 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase): ...@@ -374,9 +225,10 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
prediction_dict = model.predict(preprocessed_tensor, prediction_dict = model.predict(preprocessed_tensor,
true_image_shapes=None) true_image_shapes=None)
loss_dict = model.loss(prediction_dict, true_image_shapes=None) loss_dict = model.loss(prediction_dict, true_image_shapes=None)
return ( return (self._get_value_for_matching_key(loss_dict,
_get_value_for_matching_key(loss_dict, 'Loss/localization_loss'), 'Loss/localization_loss'),
_get_value_for_matching_key(loss_dict, 'Loss/classification_loss')) self._get_value_for_matching_key(loss_dict,
'Loss/classification_loss'))
batch_size = 2 batch_size = 2
preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32) preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32)
...@@ -413,7 +265,8 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase): ...@@ -413,7 +265,8 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
prediction_dict = model.predict(preprocessed_tensor, prediction_dict = model.predict(preprocessed_tensor,
true_image_shapes=None) true_image_shapes=None)
loss_dict = model.loss(prediction_dict, true_image_shapes=None) loss_dict = model.loss(prediction_dict, true_image_shapes=None)
return (_get_value_for_matching_key(loss_dict, 'Loss/localization_loss'),) return (self._get_value_for_matching_key(loss_dict,
'Loss/localization_loss'),)
batch_size = 2 batch_size = 2
preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32) preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32)
...@@ -443,9 +296,10 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase): ...@@ -443,9 +296,10 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
prediction_dict = model.predict(preprocessed_tensor, prediction_dict = model.predict(preprocessed_tensor,
true_image_shapes=None) true_image_shapes=None)
loss_dict = model.loss(prediction_dict, true_image_shapes=None) loss_dict = model.loss(prediction_dict, true_image_shapes=None)
return ( return (self._get_value_for_matching_key(loss_dict,
_get_value_for_matching_key(loss_dict, 'Loss/localization_loss'), 'Loss/localization_loss'),
_get_value_for_matching_key(loss_dict, 'Loss/classification_loss')) self._get_value_for_matching_key(loss_dict,
'Loss/classification_loss'))
batch_size = 2 batch_size = 2
preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32) preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32)
...@@ -591,6 +445,55 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase): ...@@ -591,6 +445,55 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
self.assertAllClose(localization_loss, expected_localization_loss) self.assertAllClose(localization_loss, expected_localization_loss)
self.assertAllClose(classification_loss, expected_classification_loss) self.assertAllClose(classification_loss, expected_classification_loss)
def test_loss_results_are_correct_with_losses_mask(self, use_keras):
with tf.Graph().as_default():
_, num_classes, num_anchors, _ = self._create_model(use_keras=use_keras)
def graph_fn(preprocessed_tensor, groundtruth_boxes1, groundtruth_boxes2,
groundtruth_boxes3, groundtruth_classes1, groundtruth_classes2,
groundtruth_classes3):
groundtruth_boxes_list = [groundtruth_boxes1, groundtruth_boxes2,
groundtruth_boxes3]
groundtruth_classes_list = [groundtruth_classes1, groundtruth_classes2,
groundtruth_classes3]
is_annotated_list = [tf.constant(True), tf.constant(True),
tf.constant(False)]
model, _, _, _ = self._create_model(apply_hard_mining=False)
model.provide_groundtruth(groundtruth_boxes_list,
groundtruth_classes_list,
is_annotated_list=is_annotated_list)
prediction_dict = model.predict(preprocessed_tensor,
true_image_shapes=None)
loss_dict = model.loss(prediction_dict, true_image_shapes=None)
return (self._get_value_for_matching_key(loss_dict,
'Loss/localization_loss'),
self._get_value_for_matching_key(loss_dict,
'Loss/classification_loss'))
batch_size = 3
preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32)
groundtruth_boxes1 = np.array([[0, 0, .5, .5]], dtype=np.float32)
groundtruth_boxes2 = np.array([[0, 0, .5, .5]], dtype=np.float32)
groundtruth_boxes3 = np.array([[0, 0, .5, .5]], dtype=np.float32)
groundtruth_classes1 = np.array([[1]], dtype=np.float32)
groundtruth_classes2 = np.array([[1]], dtype=np.float32)
groundtruth_classes3 = np.array([[1]], dtype=np.float32)
expected_localization_loss = 0.0
# Note that we are subtracting 1 from batch_size, since the final image is
# not annotated.
expected_classification_loss = ((batch_size - 1) * num_anchors
* (num_classes+1) * np.log(2.0))
(localization_loss,
classification_loss) = self.execute(graph_fn, [preprocessed_input,
groundtruth_boxes1,
groundtruth_boxes2,
groundtruth_boxes3,
groundtruth_classes1,
groundtruth_classes2,
groundtruth_classes3])
self.assertAllClose(localization_loss, expected_localization_loss)
self.assertAllClose(classification_loss, expected_classification_loss)
def test_restore_map_for_detection_ckpt(self, use_keras): def test_restore_map_for_detection_ckpt(self, use_keras):
model, _, _, _ = self._create_model(use_keras=use_keras) model, _, _, _ = self._create_model(use_keras=use_keras)
model.predict(tf.constant(np.array([[[[0, 0], [1, 1]], [[1, 0], [0, 1]]]], model.predict(tf.constant(np.array([[[[0, 0], [1, 1]], [[1, 0], [0, 1]]]],
...@@ -678,10 +581,8 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase): ...@@ -678,10 +581,8 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
use_keras): use_keras):
with tf.Graph().as_default(): with tf.Graph().as_default():
_, num_classes, num_anchors, _ = self._create_model( _, num_classes, _, _ = self._create_model(
random_example_sampling=True, random_example_sampling=True, use_keras=use_keras)
use_keras=use_keras)
print num_classes, num_anchors
def graph_fn(preprocessed_tensor, groundtruth_boxes1, groundtruth_boxes2, def graph_fn(preprocessed_tensor, groundtruth_boxes1, groundtruth_boxes2,
groundtruth_classes1, groundtruth_classes2): groundtruth_classes1, groundtruth_classes2):
...@@ -694,9 +595,10 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase): ...@@ -694,9 +595,10 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
prediction_dict = model.predict( prediction_dict = model.predict(
preprocessed_tensor, true_image_shapes=None) preprocessed_tensor, true_image_shapes=None)
loss_dict = model.loss(prediction_dict, true_image_shapes=None) loss_dict = model.loss(prediction_dict, true_image_shapes=None)
return (_get_value_for_matching_key(loss_dict, 'Loss/localization_loss'), return (self._get_value_for_matching_key(loss_dict,
_get_value_for_matching_key(loss_dict, 'Loss/localization_loss'),
'Loss/classification_loss')) self._get_value_for_matching_key(loss_dict,
'Loss/classification_loss'))
batch_size = 2 batch_size = 2
preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32) preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32)
......
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Helper functions for SSD models meta architecture tests."""
import functools
import tensorflow as tf
from object_detection.core import anchor_generator
from object_detection.core import balanced_positive_negative_sampler as sampler
from object_detection.core import box_list
from object_detection.core import losses
from object_detection.core import post_processing
from object_detection.core import region_similarity_calculator as sim_calc
from object_detection.core import target_assigner
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.utils import ops
from object_detection.utils import test_case
from object_detection.utils import test_utils
slim = tf.contrib.slim
keras = tf.keras.layers
class FakeSSDFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""Fake ssd feature extracture for ssd meta arch tests."""
def __init__(self):
super(FakeSSDFeatureExtractor, self).__init__(
is_training=True,
depth_multiplier=0,
min_depth=0,
pad_to_multiple=1,
conv_hyperparams_fn=None)
def preprocess(self, resized_inputs):
return tf.identity(resized_inputs)
def extract_features(self, preprocessed_inputs):
with tf.variable_scope('mock_model'):
features = slim.conv2d(
inputs=preprocessed_inputs,
num_outputs=32,
kernel_size=1,
scope='layer1')
return [features]
class FakeSSDKerasFeatureExtractor(ssd_meta_arch.SSDKerasFeatureExtractor):
"""Fake keras based ssd feature extracture for ssd meta arch tests."""
def __init__(self):
with tf.name_scope('mock_model'):
super(FakeSSDKerasFeatureExtractor, self).__init__(
is_training=True,
depth_multiplier=0,
min_depth=0,
pad_to_multiple=1,
conv_hyperparams=None,
freeze_batchnorm=False,
inplace_batchnorm_update=False,
)
self._conv = keras.Conv2D(filters=32, kernel_size=1, name='layer1')
def preprocess(self, resized_inputs):
return tf.identity(resized_inputs)
def _extract_features(self, preprocessed_inputs, **kwargs):
with tf.name_scope('mock_model'):
return [self._conv(preprocessed_inputs)]
class MockAnchorGenerator2x2(anchor_generator.AnchorGenerator):
"""A simple 2x2 anchor grid on the unit square used for test only."""
def name_scope(self):
return 'MockAnchorGenerator'
def num_anchors_per_location(self):
return [1]
def _generate(self, feature_map_shape_list, im_height, im_width):
return [
box_list.BoxList(
tf.constant(
[
[0, 0, .5, .5],
[0, .5, .5, 1],
[.5, 0, 1, .5],
[1., 1., 1.5, 1.5] # Anchor that is outside clip_window.
],
tf.float32))
]
def num_anchors(self):
return 4
class SSDMetaArchTestBase(test_case.TestCase):
"""Base class to test SSD based meta architectures."""
def _create_model(self,
model_fn=ssd_meta_arch.SSDMetaArch,
apply_hard_mining=True,
normalize_loc_loss_by_codesize=False,
add_background_class=True,
random_example_sampling=False,
weight_regression_loss_by_score=False,
use_expected_classification_loss_under_sampling=False,
minimum_negative_sampling=1,
desired_negative_sampling_ratio=3,
use_keras=False,
predict_mask=False,
use_static_shapes=False,
nms_max_size_per_class=5):
is_training = False
num_classes = 1
mock_anchor_generator = MockAnchorGenerator2x2()
if use_keras:
mock_box_predictor = test_utils.MockKerasBoxPredictor(
is_training, num_classes, predict_mask=predict_mask)
else:
mock_box_predictor = test_utils.MockBoxPredictor(
is_training, num_classes, predict_mask=predict_mask)
mock_box_coder = test_utils.MockBoxCoder()
if use_keras:
fake_feature_extractor = FakeSSDKerasFeatureExtractor()
else:
fake_feature_extractor = FakeSSDFeatureExtractor()
mock_matcher = test_utils.MockMatcher()
region_similarity_calculator = sim_calc.IouSimilarity()
encode_background_as_zeros = False
def image_resizer_fn(image):
return [tf.identity(image), tf.shape(image)]
classification_loss = losses.WeightedSigmoidClassificationLoss()
localization_loss = losses.WeightedSmoothL1LocalizationLoss()
non_max_suppression_fn = functools.partial(
post_processing.batch_multiclass_non_max_suppression,
score_thresh=-20.0,
iou_thresh=1.0,
max_size_per_class=nms_max_size_per_class,
max_total_size=nms_max_size_per_class,
use_static_shapes=use_static_shapes)
classification_loss_weight = 1.0
localization_loss_weight = 1.0
negative_class_weight = 1.0
normalize_loss_by_num_matches = False
hard_example_miner = None
if apply_hard_mining:
# This hard example miner is expected to be a no-op.
hard_example_miner = losses.HardExampleMiner(
num_hard_examples=None, iou_threshold=1.0)
random_example_sampler = None
if random_example_sampling:
random_example_sampler = sampler.BalancedPositiveNegativeSampler(
positive_fraction=0.5)
target_assigner_instance = target_assigner.TargetAssigner(
region_similarity_calculator,
mock_matcher,
mock_box_coder,
negative_class_weight=negative_class_weight,
weight_regression_loss_by_score=weight_regression_loss_by_score)
expected_classification_loss_under_sampling = None
if use_expected_classification_loss_under_sampling:
expected_classification_loss_under_sampling = functools.partial(
ops.expected_classification_loss_under_sampling,
minimum_negative_sampling=minimum_negative_sampling,
desired_negative_sampling_ratio=desired_negative_sampling_ratio)
code_size = 4
model = model_fn(
is_training=is_training,
anchor_generator=mock_anchor_generator,
box_predictor=mock_box_predictor,
box_coder=mock_box_coder,
feature_extractor=fake_feature_extractor,
encode_background_as_zeros=encode_background_as_zeros,
image_resizer_fn=image_resizer_fn,
non_max_suppression_fn=non_max_suppression_fn,
score_conversion_fn=tf.identity,
classification_loss=classification_loss,
localization_loss=localization_loss,
classification_loss_weight=classification_loss_weight,
localization_loss_weight=localization_loss_weight,
normalize_loss_by_num_matches=normalize_loss_by_num_matches,
hard_example_miner=hard_example_miner,
target_assigner_instance=target_assigner_instance,
add_summaries=False,
normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
freeze_batchnorm=False,
inplace_batchnorm_update=False,
add_background_class=add_background_class,
random_example_sampler=random_example_sampler,
expected_classification_loss_under_sampling=
expected_classification_loss_under_sampling)
return model, num_classes, mock_anchor_generator.num_anchors(), code_size
def _get_value_for_matching_key(self, dictionary, suffix):
for key in dictionary.keys():
if key.endswith(suffix):
return dictionary[key]
raise ValueError('key not found {}'.format(suffix))
if __name__ == '__main__':
tf.test.main()
...@@ -18,6 +18,7 @@ import tensorflow as tf ...@@ -18,6 +18,7 @@ import tensorflow as tf
from object_detection.core import standard_fields from object_detection.core import standard_fields
from object_detection.metrics import coco_tools from object_detection.metrics import coco_tools
from object_detection.utils import json_utils
from object_detection.utils import object_detection_evaluation from object_detection.utils import object_detection_evaluation
...@@ -148,6 +149,19 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -148,6 +149,19 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
detection_classes])) detection_classes]))
self._image_ids[image_id] = True self._image_ids[image_id] = True
def dump_detections_to_json_file(self, json_output_path):
"""Saves the detections into json_output_path in the format used by MS COCO.
Args:
json_output_path: String containing the output file's path. It can be also
None. In that case nothing will be written to the output file.
"""
if json_output_path and json_output_path is not None:
with tf.gfile.GFile(json_output_path, 'w') as fid:
tf.logging.info('Dumping detections to output json file.')
json_utils.Dump(
obj=self._detection_boxes_list, fid=fid, float_digits=4, indent=2)
def evaluate(self): def evaluate(self):
"""Evaluates the detection boxes and returns a dictionary of coco metrics. """Evaluates the detection boxes and returns a dictionary of coco metrics.
...@@ -245,10 +259,11 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -245,10 +259,11 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
detection_boxes_batched, detection_scores_batched, detection_boxes_batched, detection_scores_batched,
detection_classes_batched, num_det_boxes_per_image): detection_classes_batched, num_det_boxes_per_image):
self.add_single_ground_truth_image_info( self.add_single_ground_truth_image_info(
image_id, image_id, {
{'groundtruth_boxes': gt_box[:num_gt_box], 'groundtruth_boxes': gt_box[:num_gt_box],
'groundtruth_classes': gt_class[:num_gt_box], 'groundtruth_classes': gt_class[:num_gt_box],
'groundtruth_is_crowd': gt_is_crowd[:num_gt_box]}) 'groundtruth_is_crowd': gt_is_crowd[:num_gt_box]
})
self.add_single_detected_image_info( self.add_single_detected_image_info(
image_id, image_id,
{'detection_boxes': det_box[:num_det_box], {'detection_boxes': det_box[:num_det_box],
...@@ -268,8 +283,7 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -268,8 +283,7 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
detection_classes = eval_dict[detection_fields.detection_classes] detection_classes = eval_dict[detection_fields.detection_classes]
num_gt_boxes_per_image = eval_dict.get( num_gt_boxes_per_image = eval_dict.get(
'num_groundtruth_boxes_per_image', None) 'num_groundtruth_boxes_per_image', None)
num_det_boxes_per_image = eval_dict.get( num_det_boxes_per_image = eval_dict.get('num_det_boxes_per_image', None)
'num_groundtruth_boxes_per_image', None)
if groundtruth_is_crowd is None: if groundtruth_is_crowd is None:
groundtruth_is_crowd = tf.zeros_like(groundtruth_classes, dtype=tf.bool) groundtruth_is_crowd = tf.zeros_like(groundtruth_classes, dtype=tf.bool)
...@@ -491,6 +505,19 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -491,6 +505,19 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
detection_classes])) detection_classes]))
self._image_ids_with_detections.update([image_id]) self._image_ids_with_detections.update([image_id])
def dump_detections_to_json_file(self, json_output_path):
"""Saves the detections into json_output_path in the format used by MS COCO.
Args:
json_output_path: String containing the output file's path. It can be also
None. In that case nothing will be written to the output file.
"""
if json_output_path and json_output_path is not None:
tf.logging.info('Dumping detections to output json file.')
with tf.gfile.GFile(json_output_path, 'w') as fid:
json_utils.Dump(
obj=self._detection_masks_list, fid=fid, float_digits=4, indent=2)
def evaluate(self): def evaluate(self):
"""Evaluates the detection masks and returns a dictionary of coco metrics. """Evaluates the detection masks and returns a dictionary of coco metrics.
......
...@@ -24,14 +24,25 @@ from object_detection.core import standard_fields ...@@ -24,14 +24,25 @@ from object_detection.core import standard_fields
from object_detection.metrics import coco_evaluation from object_detection.metrics import coco_evaluation
def _get_categories_list():
return [{
'id': 1,
'name': 'person'
}, {
'id': 2,
'name': 'dog'
}, {
'id': 3,
'name': 'cat'
}]
class CocoDetectionEvaluationTest(tf.test.TestCase): class CocoDetectionEvaluationTest(tf.test.TestCase):
def testGetOneMAPWithMatchingGroundtruthAndDetections(self): def testGetOneMAPWithMatchingGroundtruthAndDetections(self):
"""Tests that mAP is calculated correctly on GT and Detections.""" """Tests that mAP is calculated correctly on GT and Detections."""
category_list = [{'id': 0, 'name': 'person'}, coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
{'id': 1, 'name': 'cat'}, _get_categories_list())
{'id': 2, 'name': 'dog'}]
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(category_list)
coco_evaluator.add_single_ground_truth_image_info( coco_evaluator.add_single_ground_truth_image_info(
image_id='image1', image_id='image1',
groundtruth_dict={ groundtruth_dict={
...@@ -88,17 +99,8 @@ class CocoDetectionEvaluationTest(tf.test.TestCase): ...@@ -88,17 +99,8 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
def testGetOneMAPWithMatchingGroundtruthAndDetectionsSkipCrowd(self): def testGetOneMAPWithMatchingGroundtruthAndDetectionsSkipCrowd(self):
"""Tests computing mAP with is_crowd GT boxes skipped.""" """Tests computing mAP with is_crowd GT boxes skipped."""
category_list = [{ coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
'id': 0, _get_categories_list())
'name': 'person'
}, {
'id': 1,
'name': 'cat'
}, {
'id': 2,
'name': 'dog'
}]
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(category_list)
coco_evaluator.add_single_ground_truth_image_info( coco_evaluator.add_single_ground_truth_image_info(
image_id='image1', image_id='image1',
groundtruth_dict={ groundtruth_dict={
...@@ -124,17 +126,8 @@ class CocoDetectionEvaluationTest(tf.test.TestCase): ...@@ -124,17 +126,8 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
def testGetOneMAPWithMatchingGroundtruthAndDetectionsEmptyCrowd(self): def testGetOneMAPWithMatchingGroundtruthAndDetectionsEmptyCrowd(self):
"""Tests computing mAP with empty is_crowd array passed in.""" """Tests computing mAP with empty is_crowd array passed in."""
category_list = [{ coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
'id': 0, _get_categories_list())
'name': 'person'
}, {
'id': 1,
'name': 'cat'
}, {
'id': 2,
'name': 'dog'
}]
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(category_list)
coco_evaluator.add_single_ground_truth_image_info( coco_evaluator.add_single_ground_truth_image_info(
image_id='image1', image_id='image1',
groundtruth_dict={ groundtruth_dict={
...@@ -160,11 +153,9 @@ class CocoDetectionEvaluationTest(tf.test.TestCase): ...@@ -160,11 +153,9 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
def testRejectionOnDuplicateGroundtruth(self): def testRejectionOnDuplicateGroundtruth(self):
"""Tests that groundtruth cannot be added more than once for an image.""" """Tests that groundtruth cannot be added more than once for an image."""
categories = [{'id': 1, 'name': 'cat'}, coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
{'id': 2, 'name': 'dog'}, _get_categories_list())
{'id': 3, 'name': 'elephant'}]
# Add groundtruth # Add groundtruth
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(categories)
image_key1 = 'img1' image_key1 = 'img1'
groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]], groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
dtype=float) dtype=float)
...@@ -189,11 +180,9 @@ class CocoDetectionEvaluationTest(tf.test.TestCase): ...@@ -189,11 +180,9 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
def testRejectionOnDuplicateDetections(self): def testRejectionOnDuplicateDetections(self):
"""Tests that detections cannot be added more than once for an image.""" """Tests that detections cannot be added more than once for an image."""
categories = [{'id': 1, 'name': 'cat'}, coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
{'id': 2, 'name': 'dog'}, _get_categories_list())
{'id': 3, 'name': 'elephant'}]
# Add groundtruth # Add groundtruth
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(categories)
coco_evaluator.add_single_ground_truth_image_info( coco_evaluator.add_single_ground_truth_image_info(
image_id='image1', image_id='image1',
groundtruth_dict={ groundtruth_dict={
...@@ -227,10 +216,8 @@ class CocoDetectionEvaluationTest(tf.test.TestCase): ...@@ -227,10 +216,8 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
def testExceptionRaisedWithMissingGroundtruth(self): def testExceptionRaisedWithMissingGroundtruth(self):
"""Tests that exception is raised for detection with missing groundtruth.""" """Tests that exception is raised for detection with missing groundtruth."""
categories = [{'id': 1, 'name': 'cat'}, coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
{'id': 2, 'name': 'dog'}, _get_categories_list())
{'id': 3, 'name': 'elephant'}]
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(categories)
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
coco_evaluator.add_single_detected_image_info( coco_evaluator.add_single_detected_image_info(
image_id='image1', image_id='image1',
...@@ -247,10 +234,8 @@ class CocoDetectionEvaluationTest(tf.test.TestCase): ...@@ -247,10 +234,8 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
class CocoEvaluationPyFuncTest(tf.test.TestCase): class CocoEvaluationPyFuncTest(tf.test.TestCase):
def testGetOneMAPWithMatchingGroundtruthAndDetections(self): def testGetOneMAPWithMatchingGroundtruthAndDetections(self):
category_list = [{'id': 0, 'name': 'person'}, coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
{'id': 1, 'name': 'cat'}, _get_categories_list())
{'id': 2, 'name': 'dog'}]
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(category_list)
image_id = tf.placeholder(tf.string, shape=()) image_id = tf.placeholder(tf.string, shape=())
groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4)) groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4))
groundtruth_classes = tf.placeholder(tf.float32, shape=(None)) groundtruth_classes = tf.placeholder(tf.float32, shape=(None))
...@@ -310,31 +295,22 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase): ...@@ -310,31 +295,22 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'], self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'],
-1.0) 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'], self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'],
-1.0) 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0)
self.assertFalse(coco_evaluator._groundtruth_list) self.assertFalse(coco_evaluator._groundtruth_list)
self.assertFalse(coco_evaluator._detection_boxes_list) self.assertFalse(coco_evaluator._detection_boxes_list)
self.assertFalse(coco_evaluator._image_ids) self.assertFalse(coco_evaluator._image_ids)
def testGetOneMAPWithMatchingGroundtruthAndDetectionsPadded(self): def testGetOneMAPWithMatchingGroundtruthAndDetectionsPadded(self):
category_list = [{ coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
'id': 0, _get_categories_list())
'name': 'person'
}, {
'id': 1,
'name': 'cat'
}, {
'id': 2,
'name': 'dog'
}]
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(category_list)
image_id = tf.placeholder(tf.string, shape=()) image_id = tf.placeholder(tf.string, shape=())
groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4)) groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4))
groundtruth_classes = tf.placeholder(tf.float32, shape=(None)) groundtruth_classes = tf.placeholder(tf.float32, shape=(None))
...@@ -415,24 +391,22 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase): ...@@ -415,24 +391,22 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'], self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'],
-1.0) 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 0.75) self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 0.83333331)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'], self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'],
-1.0) 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0)
self.assertFalse(coco_evaluator._groundtruth_list) self.assertFalse(coco_evaluator._groundtruth_list)
self.assertFalse(coco_evaluator._detection_boxes_list) self.assertFalse(coco_evaluator._detection_boxes_list)
self.assertFalse(coco_evaluator._image_ids) self.assertFalse(coco_evaluator._image_ids)
def testGetOneMAPWithMatchingGroundtruthAndDetectionsBatched(self): def testGetOneMAPWithMatchingGroundtruthAndDetectionsBatched(self):
category_list = [{'id': 0, 'name': 'person'}, coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
{'id': 1, 'name': 'cat'}, _get_categories_list())
{'id': 2, 'name': 'dog'}]
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(category_list)
batch_size = 3 batch_size = 3
image_id = tf.placeholder(tf.string, shape=(batch_size)) image_id = tf.placeholder(tf.string, shape=(batch_size))
groundtruth_boxes = tf.placeholder(tf.float32, shape=(batch_size, None, 4)) groundtruth_boxes = tf.placeholder(tf.float32, shape=(batch_size, None, 4))
...@@ -479,24 +453,22 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase): ...@@ -479,24 +453,22 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'], self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'],
-1.0) 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'], self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'],
-1.0) 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0)
self.assertFalse(coco_evaluator._groundtruth_list) self.assertFalse(coco_evaluator._groundtruth_list)
self.assertFalse(coco_evaluator._detection_boxes_list) self.assertFalse(coco_evaluator._detection_boxes_list)
self.assertFalse(coco_evaluator._image_ids) self.assertFalse(coco_evaluator._image_ids)
def testGetOneMAPWithMatchingGroundtruthAndDetectionsPaddedBatches(self): def testGetOneMAPWithMatchingGroundtruthAndDetectionsPaddedBatches(self):
category_list = [{'id': 0, 'name': 'person'}, coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
{'id': 1, 'name': 'cat'}, _get_categories_list())
{'id': 2, 'name': 'dog'}]
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(category_list)
batch_size = 3 batch_size = 3
image_id = tf.placeholder(tf.string, shape=(batch_size)) image_id = tf.placeholder(tf.string, shape=(batch_size))
groundtruth_boxes = tf.placeholder(tf.float32, shape=(batch_size, None, 4)) groundtruth_boxes = tf.placeholder(tf.float32, shape=(batch_size, None, 4))
...@@ -525,27 +497,40 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase): ...@@ -525,27 +497,40 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
_, update_op = eval_metric_ops['DetectionBoxes_Precision/mAP'] _, update_op = eval_metric_ops['DetectionBoxes_Precision/mAP']
with self.test_session() as sess: with self.test_session() as sess:
sess.run(update_op, sess.run(
feed_dict={ update_op,
image_id: ['image1', 'image2', 'image3'], feed_dict={
groundtruth_boxes: np.array([[[100., 100., 200., 200.], image_id: ['image1', 'image2', 'image3'],
[-1, -1, -1, -1]], groundtruth_boxes:
[[50., 50., 100., 100.], np.array([[[100., 100., 200., 200.], [-1, -1, -1, -1]],
[-1, -1, -1, -1]], [[50., 50., 100., 100.], [-1, -1, -1, -1]],
[[25., 25., 50., 50.], [[25., 25., 50., 50.], [10., 10., 15., 15.]]]),
[10., 10., 15., 15.]]]), groundtruth_classes:
groundtruth_classes: np.array([[1, -1], [3, -1], [2, 2]]), np.array([[1, -1], [3, -1], [2, 2]]),
num_gt_boxes_per_image: np.array([1, 1, 2]), num_gt_boxes_per_image:
detection_boxes: np.array([[[100., 100., 200., 200.], np.array([1, 1, 2]),
[0., 0., 0., 0.]], detection_boxes:
[[50., 50., 100., 100.], np.array([[[100., 100., 200., 200.],
[0., 0., 0., 0.]], [0., 0., 0., 0.],
[[25., 25., 50., 50.], [0., 0., 0., 0.]],
[10., 10., 15., 15.]]]), [[50., 50., 100., 100.],
detection_scores: np.array([[.8, 0.], [.7, 0.], [.95, .9]]), [0., 0., 0., 0.],
detection_classes: np.array([[1, -1], [3, -1], [2, 2]]), [0., 0., 0., 0.]],
num_det_boxes_per_image: np.array([1, 1, 2]), [[25., 25., 50., 50.],
}) [10., 10., 15., 15.],
[10., 10., 15., 15.]]]),
detection_scores:
np.array([[.8, 0., 0.], [.7, 0., 0.], [.95, .9, 0.9]]),
detection_classes:
np.array([[1, -1, -1], [3, -1, -1], [2, 2, 2]]),
num_det_boxes_per_image:
np.array([1, 1, 3]),
})
# Check the number of bounding boxes added.
self.assertEqual(len(coco_evaluator._groundtruth_list), 4)
self.assertEqual(len(coco_evaluator._detection_boxes_list), 5)
metrics = {} metrics = {}
for key, (value_op, _) in eval_metric_ops.iteritems(): for key, (value_op, _) in eval_metric_ops.iteritems():
metrics[key] = value_op metrics[key] = value_op
...@@ -555,14 +540,14 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase): ...@@ -555,14 +540,14 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'], self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'],
-1.0) 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 0.75) self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 0.83333331)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'], self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'],
-1.0) 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0) self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0)
self.assertFalse(coco_evaluator._groundtruth_list) self.assertFalse(coco_evaluator._groundtruth_list)
self.assertFalse(coco_evaluator._detection_boxes_list) self.assertFalse(coco_evaluator._detection_boxes_list)
...@@ -572,10 +557,7 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase): ...@@ -572,10 +557,7 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
class CocoMaskEvaluationTest(tf.test.TestCase): class CocoMaskEvaluationTest(tf.test.TestCase):
def testGetOneMAPWithMatchingGroundtruthAndDetections(self): def testGetOneMAPWithMatchingGroundtruthAndDetections(self):
category_list = [{'id': 0, 'name': 'person'}, coco_evaluator = coco_evaluation.CocoMaskEvaluator(_get_categories_list())
{'id': 1, 'name': 'cat'},
{'id': 2, 'name': 'dog'}]
coco_evaluator = coco_evaluation.CocoMaskEvaluator(category_list)
coco_evaluator.add_single_ground_truth_image_info( coco_evaluator.add_single_ground_truth_image_info(
image_id='image1', image_id='image1',
groundtruth_dict={ groundtruth_dict={
...@@ -657,10 +639,7 @@ class CocoMaskEvaluationTest(tf.test.TestCase): ...@@ -657,10 +639,7 @@ class CocoMaskEvaluationTest(tf.test.TestCase):
class CocoMaskEvaluationPyFuncTest(tf.test.TestCase): class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
def testGetOneMAPWithMatchingGroundtruthAndDetections(self): def testGetOneMAPWithMatchingGroundtruthAndDetections(self):
category_list = [{'id': 0, 'name': 'person'}, coco_evaluator = coco_evaluation.CocoMaskEvaluator(_get_categories_list())
{'id': 1, 'name': 'cat'},
{'id': 2, 'name': 'dog'}]
coco_evaluator = coco_evaluation.CocoMaskEvaluator(category_list)
image_id = tf.placeholder(tf.string, shape=()) image_id = tf.placeholder(tf.string, shape=())
groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4)) groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4))
groundtruth_classes = tf.placeholder(tf.float32, shape=(None)) groundtruth_classes = tf.placeholder(tf.float32, shape=(None))
...@@ -756,5 +735,6 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase): ...@@ -756,5 +735,6 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
self.assertFalse(coco_evaluator._image_id_to_mask_shape_map) self.assertFalse(coco_evaluator._image_id_to_mask_shape_map)
self.assertFalse(coco_evaluator._detection_masks_list) self.assertFalse(coco_evaluator._detection_masks_list)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -91,10 +91,8 @@ def read_data_and_evaluate(input_config, eval_config): ...@@ -91,10 +91,8 @@ def read_data_and_evaluate(input_config, eval_config):
if input_config.WhichOneof('input_reader') == 'tf_record_input_reader': if input_config.WhichOneof('input_reader') == 'tf_record_input_reader':
input_paths = input_config.tf_record_input_reader.input_path input_paths = input_config.tf_record_input_reader.input_path
label_map = label_map_util.load_labelmap(input_config.label_map_path) categories = label_map_util.create_categories_from_labelmap(
max_num_classes = max([item.id for item in label_map.item]) input_config.label_map_path)
categories = label_map_util.convert_label_map_to_categories(
label_map, max_num_classes)
object_detection_evaluators = evaluator.get_evaluators( object_detection_evaluators = evaluator.get_evaluators(
eval_config, categories) eval_config, categories)
......
...@@ -18,6 +18,7 @@ from __future__ import absolute_import ...@@ -18,6 +18,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import copy
import functools import functools
import os import os
...@@ -43,9 +44,12 @@ MODEL_BUILD_UTIL_MAP = { ...@@ -43,9 +44,12 @@ MODEL_BUILD_UTIL_MAP = {
config_util.create_pipeline_proto_from_configs, config_util.create_pipeline_proto_from_configs,
'merge_external_params_with_configs': 'merge_external_params_with_configs':
config_util.merge_external_params_with_configs, config_util.merge_external_params_with_configs,
'create_train_input_fn': inputs.create_train_input_fn, 'create_train_input_fn':
'create_eval_input_fn': inputs.create_eval_input_fn, inputs.create_train_input_fn,
'create_predict_input_fn': inputs.create_predict_input_fn, 'create_eval_input_fn':
inputs.create_eval_input_fn,
'create_predict_input_fn':
inputs.create_predict_input_fn,
} }
...@@ -126,8 +130,9 @@ def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True): ...@@ -126,8 +130,9 @@ def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True):
ValueError: If unpad_tensors is True and `tensor_dict` does not contain ValueError: If unpad_tensors is True and `tensor_dict` does not contain
`num_groundtruth_boxes` tensor. `num_groundtruth_boxes` tensor.
""" """
unbatched_tensor_dict = {key: tf.unstack(tensor) unbatched_tensor_dict = {
for key, tensor in tensor_dict.items()} key: tf.unstack(tensor) for key, tensor in tensor_dict.items()
}
if unpad_groundtruth_tensors: if unpad_groundtruth_tensors:
if (fields.InputDataFields.num_groundtruth_boxes not in if (fields.InputDataFields.num_groundtruth_boxes not in
unbatched_tensor_dict): unbatched_tensor_dict):
...@@ -206,8 +211,8 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False): ...@@ -206,8 +211,8 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
# Make sure to set the Keras learning phase. True during training, # Make sure to set the Keras learning phase. True during training,
# False for inference. # False for inference.
tf.keras.backend.set_learning_phase(is_training) tf.keras.backend.set_learning_phase(is_training)
detection_model = detection_model_fn(is_training=is_training, detection_model = detection_model_fn(
add_summaries=(not use_tpu)) is_training=is_training, add_summaries=(not use_tpu))
scaffold_fn = None scaffold_fn = None
if mode == tf.estimator.ModeKeys.TRAIN: if mode == tf.estimator.ModeKeys.TRAIN:
...@@ -237,6 +242,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False): ...@@ -237,6 +242,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
gt_weights_list = None gt_weights_list = None
if fields.InputDataFields.groundtruth_weights in labels: if fields.InputDataFields.groundtruth_weights in labels:
gt_weights_list = labels[fields.InputDataFields.groundtruth_weights] gt_weights_list = labels[fields.InputDataFields.groundtruth_weights]
gt_is_crowd_list = None
if fields.InputDataFields.groundtruth_is_crowd in labels: if fields.InputDataFields.groundtruth_is_crowd in labels:
gt_is_crowd_list = labels[fields.InputDataFields.groundtruth_is_crowd] gt_is_crowd_list = labels[fields.InputDataFields.groundtruth_is_crowd]
detection_model.provide_groundtruth( detection_model.provide_groundtruth(
...@@ -248,8 +254,18 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False): ...@@ -248,8 +254,18 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
groundtruth_is_crowd_list=gt_is_crowd_list) groundtruth_is_crowd_list=gt_is_crowd_list)
preprocessed_images = features[fields.InputDataFields.image] preprocessed_images = features[fields.InputDataFields.image]
prediction_dict = detection_model.predict( if use_tpu and train_config.use_bfloat16:
preprocessed_images, features[fields.InputDataFields.true_image_shape]) with tf.contrib.tpu.bfloat16_scope():
prediction_dict = detection_model.predict(
preprocessed_images,
features[fields.InputDataFields.true_image_shape])
for k, v in prediction_dict.items():
if v.dtype == tf.bfloat16:
prediction_dict[k] = tf.cast(v, tf.float32)
else:
prediction_dict = detection_model.predict(
preprocessed_images,
features[fields.InputDataFields.true_image_shape])
if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT): if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT):
detections = detection_model.postprocess( detections = detection_model.postprocess(
prediction_dict, features[fields.InputDataFields.true_image_shape]) prediction_dict, features[fields.InputDataFields.true_image_shape])
...@@ -270,13 +286,16 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False): ...@@ -270,13 +286,16 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
train_config.load_all_detection_checkpoint_vars)) train_config.load_all_detection_checkpoint_vars))
available_var_map = ( available_var_map = (
variables_helper.get_variables_available_in_checkpoint( variables_helper.get_variables_available_in_checkpoint(
asg_map, train_config.fine_tune_checkpoint, asg_map,
train_config.fine_tune_checkpoint,
include_global_step=False)) include_global_step=False))
if use_tpu: if use_tpu:
def tpu_scaffold(): def tpu_scaffold():
tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint, tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint,
available_var_map) available_var_map)
return tf.train.Scaffold() return tf.train.Scaffold()
scaffold_fn = tpu_scaffold scaffold_fn = tpu_scaffold
else: else:
tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint, tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint,
...@@ -290,8 +309,8 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False): ...@@ -290,8 +309,8 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
regularization_losses = tf.get_collection( regularization_losses = tf.get_collection(
tf.GraphKeys.REGULARIZATION_LOSSES) tf.GraphKeys.REGULARIZATION_LOSSES)
if regularization_losses: if regularization_losses:
regularization_loss = tf.add_n(regularization_losses, regularization_loss = tf.add_n(
name='regularization_loss') regularization_losses, name='regularization_loss')
losses.append(regularization_loss) losses.append(regularization_loss)
losses_dict['Loss/regularization_loss'] = regularization_loss losses_dict['Loss/regularization_loss'] = regularization_loss
total_loss = tf.add_n(losses, name='total_loss') total_loss = tf.add_n(losses, name='total_loss')
...@@ -353,14 +372,19 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False): ...@@ -353,14 +372,19 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
eval_metric_ops = None eval_metric_ops = None
scaffold = None scaffold = None
if mode == tf.estimator.ModeKeys.EVAL: if mode == tf.estimator.ModeKeys.EVAL:
class_agnostic = (fields.DetectionResultFields.detection_classes class_agnostic = (
not in detections) fields.DetectionResultFields.detection_classes not in detections)
groundtruth = _prepare_groundtruth_for_eval( groundtruth = _prepare_groundtruth_for_eval(detection_model,
detection_model, class_agnostic) class_agnostic)
use_original_images = fields.InputDataFields.original_image in features use_original_images = fields.InputDataFields.original_image in features
eval_images = ( if use_original_images:
features[fields.InputDataFields.original_image] if use_original_images eval_images = tf.cast(tf.image.resize_bilinear(
else features[fields.InputDataFields.image]) features[fields.InputDataFields.original_image][0:1],
features[fields.InputDataFields.original_image_spatial_shape][0]),
tf.uint8)
else:
eval_images = features[fields.InputDataFields.image]
eval_dict = eval_util.result_dict_for_single_example( eval_dict = eval_util.result_dict_for_single_example(
eval_images[0:1], eval_images[0:1],
features[inputs.HASH_KEY][0], features[inputs.HASH_KEY][0],
...@@ -374,28 +398,26 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False): ...@@ -374,28 +398,26 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
else: else:
category_index = label_map_util.create_category_index_from_labelmap( category_index = label_map_util.create_category_index_from_labelmap(
eval_input_config.label_map_path) eval_input_config.label_map_path)
img_summary = None vis_metric_ops = None
if not use_tpu and use_original_images: if not use_tpu and use_original_images:
detection_and_groundtruth = ( eval_metric_op_vis = vis_utils.VisualizeSingleFrameDetections(
vis_utils.draw_side_by_side_evaluation_image( category_index,
eval_dict, category_index, max_boxes_to_draw=eval_config.max_num_boxes_to_visualize, max_examples_to_draw=eval_config.num_visualizations,
min_score_thresh=eval_config.min_score_threshold, max_boxes_to_draw=eval_config.max_num_boxes_to_visualize,
use_normalized_coordinates=False)) min_score_thresh=eval_config.min_score_threshold,
img_summary = tf.summary.image('Detections_Left_Groundtruth_Right', use_normalized_coordinates=False)
detection_and_groundtruth) vis_metric_ops = eval_metric_op_vis.get_estimator_eval_metric_ops(
eval_dict)
# Eval metrics on a single example. # Eval metrics on a single example.
eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators( eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
eval_config, eval_config, category_index.values(), eval_dict)
category_index.values(),
eval_dict)
for loss_key, loss_tensor in iter(losses_dict.items()): for loss_key, loss_tensor in iter(losses_dict.items()):
eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor) eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor)
for var in optimizer_summary_vars: for var in optimizer_summary_vars:
eval_metric_ops[var.op.name] = (var, tf.no_op()) eval_metric_ops[var.op.name] = (var, tf.no_op())
if img_summary is not None: if vis_metric_ops is not None:
eval_metric_ops['Detections_Left_Groundtruth_Right'] = ( eval_metric_ops.update(vis_metric_ops)
img_summary, tf.no_op())
eval_metric_ops = {str(k): v for k, v in eval_metric_ops.items()} eval_metric_ops = {str(k): v for k, v in eval_metric_ops.items()}
if eval_config.use_moving_averages: if eval_config.use_moving_averages:
...@@ -435,12 +457,14 @@ def create_estimator_and_inputs(run_config, ...@@ -435,12 +457,14 @@ def create_estimator_and_inputs(run_config,
hparams, hparams,
pipeline_config_path, pipeline_config_path,
train_steps=None, train_steps=None,
eval_steps=None, sample_1_of_n_eval_examples=1,
sample_1_of_n_eval_on_train_examples=1,
model_fn_creator=create_model_fn, model_fn_creator=create_model_fn,
use_tpu_estimator=False, use_tpu_estimator=False,
use_tpu=False, use_tpu=False,
num_shards=1, num_shards=1,
params=None, params=None,
override_eval_num_epochs=True,
**kwargs): **kwargs):
"""Creates `Estimator`, input functions, and steps. """Creates `Estimator`, input functions, and steps.
...@@ -450,8 +474,11 @@ def create_estimator_and_inputs(run_config, ...@@ -450,8 +474,11 @@ def create_estimator_and_inputs(run_config,
pipeline_config_path: A path to a pipeline config file. pipeline_config_path: A path to a pipeline config file.
train_steps: Number of training steps. If None, the number of training steps train_steps: Number of training steps. If None, the number of training steps
is set from the `TrainConfig` proto. is set from the `TrainConfig` proto.
eval_steps: Number of evaluation steps per evaluation cycle. If None, the sample_1_of_n_eval_examples: Integer representing how often an eval example
number of evaluation steps is set from the `EvalConfig` proto. should be sampled. If 1, will sample all examples.
sample_1_of_n_eval_on_train_examples: Similar to
`sample_1_of_n_eval_examples`, except controls the sampling of training
data for evaluation.
model_fn_creator: A function that creates a `model_fn` for `Estimator`. model_fn_creator: A function that creates a `model_fn` for `Estimator`.
Follows the signature: Follows the signature:
...@@ -470,19 +497,20 @@ def create_estimator_and_inputs(run_config, ...@@ -470,19 +497,20 @@ def create_estimator_and_inputs(run_config,
is True. is True.
params: Parameter dictionary passed from the estimator. Only used if params: Parameter dictionary passed from the estimator. Only used if
`use_tpu_estimator` is True. `use_tpu_estimator` is True.
override_eval_num_epochs: Whether to overwrite the number of epochs to
1 for eval_input.
**kwargs: Additional keyword arguments for configuration override. **kwargs: Additional keyword arguments for configuration override.
Returns: Returns:
A dictionary with the following fields: A dictionary with the following fields:
'estimator': An `Estimator` or `TPUEstimator`. 'estimator': An `Estimator` or `TPUEstimator`.
'train_input_fn': A training input function. 'train_input_fn': A training input function.
'eval_input_fn': An evaluation input function. 'eval_input_fns': A list of all evaluation input functions.
'eval_input_names': A list of names for each evaluation input.
'eval_on_train_input_fn': An evaluation-on-train input function. 'eval_on_train_input_fn': An evaluation-on-train input function.
'predict_input_fn': A prediction input function. 'predict_input_fn': A prediction input function.
'train_steps': Number of training steps. Either directly from input or from 'train_steps': Number of training steps. Either directly from input or from
configuration. configuration.
'eval_steps': Number of evaluation steps. Either directly from input or from
configuration.
""" """
get_configs_from_pipeline_file = MODEL_BUILD_UTIL_MAP[ get_configs_from_pipeline_file = MODEL_BUILD_UTIL_MAP[
'get_configs_from_pipeline_file'] 'get_configs_from_pipeline_file']
...@@ -495,27 +523,36 @@ def create_estimator_and_inputs(run_config, ...@@ -495,27 +523,36 @@ def create_estimator_and_inputs(run_config,
create_predict_input_fn = MODEL_BUILD_UTIL_MAP['create_predict_input_fn'] create_predict_input_fn = MODEL_BUILD_UTIL_MAP['create_predict_input_fn']
configs = get_configs_from_pipeline_file(pipeline_config_path) configs = get_configs_from_pipeline_file(pipeline_config_path)
kwargs.update({
'train_steps': train_steps,
'sample_1_of_n_eval_examples': sample_1_of_n_eval_examples
})
if override_eval_num_epochs:
kwargs.update({'eval_num_epochs': 1})
tf.logging.warning(
'Forced number of epochs for all eval validations to be 1.')
configs = merge_external_params_with_configs( configs = merge_external_params_with_configs(
configs, configs, hparams, kwargs_dict=kwargs)
hparams,
train_steps=train_steps,
eval_steps=eval_steps,
retain_original_images_in_eval=False if use_tpu else True,
**kwargs)
model_config = configs['model'] model_config = configs['model']
train_config = configs['train_config'] train_config = configs['train_config']
train_input_config = configs['train_input_config'] train_input_config = configs['train_input_config']
eval_config = configs['eval_config'] eval_config = configs['eval_config']
eval_input_config = configs['eval_input_config'] eval_input_configs = configs['eval_input_configs']
eval_on_train_input_config = copy.deepcopy(train_input_config)
eval_on_train_input_config.sample_1_of_n_examples = (
sample_1_of_n_eval_on_train_examples)
if override_eval_num_epochs and eval_on_train_input_config.num_epochs != 1:
tf.logging.warning('Expected number of evaluation epochs is 1, but '
'instead encountered `eval_on_train_input_config'
'.num_epochs` = '
'{}. Overwriting `num_epochs` to 1.'.format(
eval_on_train_input_config.num_epochs))
eval_on_train_input_config.num_epochs = 1
# update train_steps from config but only when non-zero value is provided # update train_steps from config but only when non-zero value is provided
if train_steps is None and train_config.num_steps != 0: if train_steps is None and train_config.num_steps != 0:
train_steps = train_config.num_steps train_steps = train_config.num_steps
# update eval_steps from config but only when non-zero value is provided
if eval_steps is None and eval_config.num_examples != 0:
eval_steps = eval_config.num_examples
detection_model_fn = functools.partial( detection_model_fn = functools.partial(
model_builder.build, model_config=model_config) model_builder.build, model_config=model_config)
...@@ -524,18 +561,25 @@ def create_estimator_and_inputs(run_config, ...@@ -524,18 +561,25 @@ def create_estimator_and_inputs(run_config,
train_config=train_config, train_config=train_config,
train_input_config=train_input_config, train_input_config=train_input_config,
model_config=model_config) model_config=model_config)
eval_input_fn = create_eval_input_fn( eval_input_fns = [
eval_config=eval_config, create_eval_input_fn(
eval_input_config=eval_input_config, eval_config=eval_config,
model_config=model_config) eval_input_config=eval_input_config,
model_config=model_config) for eval_input_config in eval_input_configs
]
eval_input_names = [
eval_input_config.name for eval_input_config in eval_input_configs
]
eval_on_train_input_fn = create_eval_input_fn( eval_on_train_input_fn = create_eval_input_fn(
eval_config=eval_config, eval_config=eval_config,
eval_input_config=train_input_config, eval_input_config=eval_on_train_input_config,
model_config=model_config) model_config=model_config)
predict_input_fn = create_predict_input_fn( predict_input_fn = create_predict_input_fn(
model_config=model_config, predict_input_config=eval_input_config) model_config=model_config, predict_input_config=eval_input_configs[0])
tf.logging.info('create_estimator_and_inputs: use_tpu %s', use_tpu) export_to_tpu = hparams.get('export_to_tpu', False)
tf.logging.info('create_estimator_and_inputs: use_tpu %s, export_to_tpu %s',
use_tpu, export_to_tpu)
model_fn = model_fn_creator(detection_model_fn, configs, hparams, use_tpu) model_fn = model_fn_creator(detection_model_fn, configs, hparams, use_tpu)
if use_tpu_estimator: if use_tpu_estimator:
estimator = tf.contrib.tpu.TPUEstimator( estimator = tf.contrib.tpu.TPUEstimator(
...@@ -552,89 +596,85 @@ def create_estimator_and_inputs(run_config, ...@@ -552,89 +596,85 @@ def create_estimator_and_inputs(run_config,
# Write the as-run pipeline config to disk. # Write the as-run pipeline config to disk.
if run_config.is_chief: if run_config.is_chief:
pipeline_config_final = create_pipeline_proto_from_configs( pipeline_config_final = create_pipeline_proto_from_configs(configs)
configs)
config_util.save_pipeline_config(pipeline_config_final, estimator.model_dir) config_util.save_pipeline_config(pipeline_config_final, estimator.model_dir)
return dict( return dict(
estimator=estimator, estimator=estimator,
train_input_fn=train_input_fn, train_input_fn=train_input_fn,
eval_input_fn=eval_input_fn, eval_input_fns=eval_input_fns,
eval_input_names=eval_input_names,
eval_on_train_input_fn=eval_on_train_input_fn, eval_on_train_input_fn=eval_on_train_input_fn,
predict_input_fn=predict_input_fn, predict_input_fn=predict_input_fn,
train_steps=train_steps, train_steps=train_steps)
eval_steps=eval_steps)
def create_train_and_eval_specs(train_input_fn, def create_train_and_eval_specs(train_input_fn,
eval_input_fn, eval_input_fns,
eval_on_train_input_fn, eval_on_train_input_fn,
predict_input_fn, predict_input_fn,
train_steps, train_steps,
eval_steps,
eval_on_train_data=False, eval_on_train_data=False,
eval_on_train_steps=None,
final_exporter_name='Servo', final_exporter_name='Servo',
eval_spec_name='eval'): eval_spec_names=None):
"""Creates a `TrainSpec` and `EvalSpec`s. """Creates a `TrainSpec` and `EvalSpec`s.
Args: Args:
train_input_fn: Function that produces features and labels on train data. train_input_fn: Function that produces features and labels on train data.
eval_input_fn: Function that produces features and labels on eval data. eval_input_fns: A list of functions that produce features and labels on eval
data.
eval_on_train_input_fn: Function that produces features and labels for eval_on_train_input_fn: Function that produces features and labels for
evaluation on train data. evaluation on train data.
predict_input_fn: Function that produces features for inference. predict_input_fn: Function that produces features for inference.
train_steps: Number of training steps. train_steps: Number of training steps.
eval_steps: Number of eval steps.
eval_on_train_data: Whether to evaluate model on training data. Default is eval_on_train_data: Whether to evaluate model on training data. Default is
False. False.
eval_on_train_steps: Number of eval steps for training data. If not given,
uses eval_steps.
final_exporter_name: String name given to `FinalExporter`. final_exporter_name: String name given to `FinalExporter`.
eval_spec_name: String name given to main `EvalSpec`. eval_spec_names: A list of string names for each `EvalSpec`.
Returns: Returns:
Tuple of `TrainSpec` and list of `EvalSpecs`. The first `EvalSpec` is for Tuple of `TrainSpec` and list of `EvalSpecs`. If `eval_on_train_data` is
evaluation data. If `eval_on_train_data` is True, the second `EvalSpec` in True, the last `EvalSpec` in the list will correspond to training data. The
the list will correspond to training data. rest EvalSpecs in the list are evaluation datas.
""" """
exporter = tf.estimator.FinalExporter(
name=final_exporter_name, serving_input_receiver_fn=predict_input_fn)
train_spec = tf.estimator.TrainSpec( train_spec = tf.estimator.TrainSpec(
input_fn=train_input_fn, max_steps=train_steps) input_fn=train_input_fn, max_steps=train_steps)
eval_specs = [ if eval_spec_names is None:
tf.estimator.EvalSpec( eval_spec_names = range(len(eval_input_fns))
name=eval_spec_name,
input_fn=eval_input_fn, eval_specs = []
steps=eval_steps, for eval_spec_name, eval_input_fn in zip(eval_spec_names, eval_input_fns):
exporters=exporter) exporter_name = '{}_{}'.format(final_exporter_name, eval_spec_name)
] exporter = tf.estimator.FinalExporter(
name=exporter_name, serving_input_receiver_fn=predict_input_fn)
eval_specs.append(
tf.estimator.EvalSpec(
name=eval_spec_name,
input_fn=eval_input_fn,
steps=None,
exporters=exporter))
if eval_on_train_data: if eval_on_train_data:
eval_specs.append( eval_specs.append(
tf.estimator.EvalSpec( tf.estimator.EvalSpec(
name='eval_on_train', input_fn=eval_on_train_input_fn, name='eval_on_train', input_fn=eval_on_train_input_fn, steps=None))
steps=eval_on_train_steps or eval_steps))
return train_spec, eval_specs return train_spec, eval_specs
def continuous_eval(estimator, model_dir, input_fn, eval_steps, train_steps, def continuous_eval(estimator, model_dir, input_fn, train_steps, name):
name):
"""Perform continuous evaluation on checkpoints written to a model directory. """Perform continuous evaluation on checkpoints written to a model directory.
Args: Args:
estimator: Estimator object to use for evaluation. estimator: Estimator object to use for evaluation.
model_dir: Model directory to read checkpoints for continuous evaluation. model_dir: Model directory to read checkpoints for continuous evaluation.
input_fn: Input function to use for evaluation. input_fn: Input function to use for evaluation.
eval_steps: Number of steps to run during each evaluation.
train_steps: Number of training steps. This is used to infer the last train_steps: Number of training steps. This is used to infer the last
checkpoint and stop evaluation loop. checkpoint and stop evaluation loop.
name: Namescope for eval summary. name: Namescope for eval summary.
""" """
def terminate_eval(): def terminate_eval():
tf.logging.info('Terminating eval after 180 seconds of no checkpoints') tf.logging.info('Terminating eval after 180 seconds of no checkpoints')
return True return True
...@@ -646,10 +686,7 @@ def continuous_eval(estimator, model_dir, input_fn, eval_steps, train_steps, ...@@ -646,10 +686,7 @@ def continuous_eval(estimator, model_dir, input_fn, eval_steps, train_steps,
tf.logging.info('Starting Evaluation.') tf.logging.info('Starting Evaluation.')
try: try:
eval_results = estimator.evaluate( eval_results = estimator.evaluate(
input_fn=input_fn, input_fn=input_fn, steps=None, checkpoint_path=ckpt, name=name)
steps=eval_steps,
checkpoint_path=ckpt,
name=name)
tf.logging.info('Eval results: %s' % eval_results) tf.logging.info('Eval results: %s' % eval_results)
# Terminate eval job when final checkpoint is reached # Terminate eval job when final checkpoint is reached
...@@ -713,10 +750,9 @@ def populate_experiment(run_config, ...@@ -713,10 +750,9 @@ def populate_experiment(run_config,
**kwargs) **kwargs)
estimator = train_and_eval_dict['estimator'] estimator = train_and_eval_dict['estimator']
train_input_fn = train_and_eval_dict['train_input_fn'] train_input_fn = train_and_eval_dict['train_input_fn']
eval_input_fn = train_and_eval_dict['eval_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns']
predict_input_fn = train_and_eval_dict['predict_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn']
train_steps = train_and_eval_dict['train_steps'] train_steps = train_and_eval_dict['train_steps']
eval_steps = train_and_eval_dict['eval_steps']
export_strategies = [ export_strategies = [
tf.contrib.learn.utils.saved_model_export_utils.make_export_strategy( tf.contrib.learn.utils.saved_model_export_utils.make_export_strategy(
...@@ -726,8 +762,9 @@ def populate_experiment(run_config, ...@@ -726,8 +762,9 @@ def populate_experiment(run_config,
return tf.contrib.learn.Experiment( return tf.contrib.learn.Experiment(
estimator=estimator, estimator=estimator,
train_input_fn=train_input_fn, train_input_fn=train_input_fn,
eval_input_fn=eval_input_fn, eval_input_fn=eval_input_fns[0],
train_steps=train_steps, train_steps=train_steps,
eval_steps=eval_steps, eval_steps=None,
export_strategies=export_strategies, export_strategies=export_strategies,
eval_delay_secs=120,) eval_delay_secs=120,
)
...@@ -64,11 +64,13 @@ def _get_configs_for_model(model_name): ...@@ -64,11 +64,13 @@ def _get_configs_for_model(model_name):
data_path = _get_data_path() data_path = _get_data_path()
label_map_path = _get_labelmap_path() label_map_path = _get_labelmap_path()
configs = config_util.get_configs_from_pipeline_file(filename) configs = config_util.get_configs_from_pipeline_file(filename)
override_dict = {
'train_input_path': data_path,
'eval_input_path': data_path,
'label_map_path': label_map_path
}
configs = config_util.merge_external_params_with_configs( configs = config_util.merge_external_params_with_configs(
configs, configs, kwargs_dict=override_dict)
train_input_path=data_path,
eval_input_path=data_path,
label_map_path=label_map_path)
return configs return configs
...@@ -145,6 +147,9 @@ class ModelLibTest(tf.test.TestCase): ...@@ -145,6 +147,9 @@ class ModelLibTest(tf.test.TestCase):
self.assertEqual(batch_size, detection_scores.shape.as_list()[0]) self.assertEqual(batch_size, detection_scores.shape.as_list()[0])
self.assertEqual(tf.float32, detection_scores.dtype) self.assertEqual(tf.float32, detection_scores.dtype)
self.assertEqual(tf.float32, num_detections.dtype) self.assertEqual(tf.float32, num_detections.dtype)
if mode == 'eval':
self.assertIn('Detections_Left_Groundtruth_Right/0',
estimator_spec.eval_metric_ops)
if model_mode == tf.estimator.ModeKeys.TRAIN: if model_mode == tf.estimator.ModeKeys.TRAIN:
self.assertIsNotNone(estimator_spec.train_op) self.assertIsNotNone(estimator_spec.train_op)
return estimator_spec return estimator_spec
...@@ -225,21 +230,17 @@ class ModelLibTest(tf.test.TestCase): ...@@ -225,21 +230,17 @@ class ModelLibTest(tf.test.TestCase):
hparams_overrides='load_pretrained=false') hparams_overrides='load_pretrained=false')
pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST) pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST)
train_steps = 20 train_steps = 20
eval_steps = 10
train_and_eval_dict = model_lib.create_estimator_and_inputs( train_and_eval_dict = model_lib.create_estimator_and_inputs(
run_config, run_config,
hparams, hparams,
pipeline_config_path, pipeline_config_path,
train_steps=train_steps, train_steps=train_steps)
eval_steps=eval_steps)
estimator = train_and_eval_dict['estimator'] estimator = train_and_eval_dict['estimator']
train_steps = train_and_eval_dict['train_steps'] train_steps = train_and_eval_dict['train_steps']
eval_steps = train_and_eval_dict['eval_steps']
self.assertIsInstance(estimator, tf.estimator.Estimator) self.assertIsInstance(estimator, tf.estimator.Estimator)
self.assertEqual(20, train_steps) self.assertEqual(20, train_steps)
self.assertEqual(10, eval_steps)
self.assertIn('train_input_fn', train_and_eval_dict) self.assertIn('train_input_fn', train_and_eval_dict)
self.assertIn('eval_input_fn', train_and_eval_dict) self.assertIn('eval_input_fns', train_and_eval_dict)
self.assertIn('eval_on_train_input_fn', train_and_eval_dict) self.assertIn('eval_on_train_input_fn', train_and_eval_dict)
def test_create_estimator_with_default_train_eval_steps(self): def test_create_estimator_with_default_train_eval_steps(self):
...@@ -250,16 +251,13 @@ class ModelLibTest(tf.test.TestCase): ...@@ -250,16 +251,13 @@ class ModelLibTest(tf.test.TestCase):
pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST) pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST)
configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
config_train_steps = configs['train_config'].num_steps config_train_steps = configs['train_config'].num_steps
config_eval_steps = configs['eval_config'].num_examples
train_and_eval_dict = model_lib.create_estimator_and_inputs( train_and_eval_dict = model_lib.create_estimator_and_inputs(
run_config, hparams, pipeline_config_path) run_config, hparams, pipeline_config_path)
estimator = train_and_eval_dict['estimator'] estimator = train_and_eval_dict['estimator']
train_steps = train_and_eval_dict['train_steps'] train_steps = train_and_eval_dict['train_steps']
eval_steps = train_and_eval_dict['eval_steps']
self.assertIsInstance(estimator, tf.estimator.Estimator) self.assertIsInstance(estimator, tf.estimator.Estimator)
self.assertEqual(config_train_steps, train_steps) self.assertEqual(config_train_steps, train_steps)
self.assertEqual(config_eval_steps, eval_steps)
def test_create_tpu_estimator_and_inputs(self): def test_create_tpu_estimator_and_inputs(self):
"""Tests that number of train/eval defaults to config values.""" """Tests that number of train/eval defaults to config values."""
...@@ -269,21 +267,17 @@ class ModelLibTest(tf.test.TestCase): ...@@ -269,21 +267,17 @@ class ModelLibTest(tf.test.TestCase):
hparams_overrides='load_pretrained=false') hparams_overrides='load_pretrained=false')
pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST) pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST)
train_steps = 20 train_steps = 20
eval_steps = 10
train_and_eval_dict = model_lib.create_estimator_and_inputs( train_and_eval_dict = model_lib.create_estimator_and_inputs(
run_config, run_config,
hparams, hparams,
pipeline_config_path, pipeline_config_path,
train_steps=train_steps, train_steps=train_steps,
eval_steps=eval_steps,
use_tpu_estimator=True) use_tpu_estimator=True)
estimator = train_and_eval_dict['estimator'] estimator = train_and_eval_dict['estimator']
train_steps = train_and_eval_dict['train_steps'] train_steps = train_and_eval_dict['train_steps']
eval_steps = train_and_eval_dict['eval_steps']
self.assertIsInstance(estimator, tpu_estimator.TPUEstimator) self.assertIsInstance(estimator, tpu_estimator.TPUEstimator)
self.assertEqual(20, train_steps) self.assertEqual(20, train_steps)
self.assertEqual(10, eval_steps)
def test_create_train_and_eval_specs(self): def test_create_train_and_eval_specs(self):
"""Tests that `TrainSpec` and `EvalSpec` is created correctly.""" """Tests that `TrainSpec` and `EvalSpec` is created correctly."""
...@@ -292,38 +286,32 @@ class ModelLibTest(tf.test.TestCase): ...@@ -292,38 +286,32 @@ class ModelLibTest(tf.test.TestCase):
hparams_overrides='load_pretrained=false') hparams_overrides='load_pretrained=false')
pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST) pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST)
train_steps = 20 train_steps = 20
eval_steps = 10
eval_on_train_steps = 15
train_and_eval_dict = model_lib.create_estimator_and_inputs( train_and_eval_dict = model_lib.create_estimator_and_inputs(
run_config, run_config,
hparams, hparams,
pipeline_config_path, pipeline_config_path,
train_steps=train_steps, train_steps=train_steps)
eval_steps=eval_steps)
train_input_fn = train_and_eval_dict['train_input_fn'] train_input_fn = train_and_eval_dict['train_input_fn']
eval_input_fn = train_and_eval_dict['eval_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns']
eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn']
predict_input_fn = train_and_eval_dict['predict_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn']
train_steps = train_and_eval_dict['train_steps'] train_steps = train_and_eval_dict['train_steps']
eval_steps = train_and_eval_dict['eval_steps']
train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_spec, eval_specs = model_lib.create_train_and_eval_specs(
train_input_fn, train_input_fn,
eval_input_fn, eval_input_fns,
eval_on_train_input_fn, eval_on_train_input_fn,
predict_input_fn, predict_input_fn,
train_steps, train_steps,
eval_steps,
eval_on_train_data=True, eval_on_train_data=True,
eval_on_train_steps=eval_on_train_steps,
final_exporter_name='exporter', final_exporter_name='exporter',
eval_spec_name='holdout') eval_spec_names=['holdout'])
self.assertEqual(train_steps, train_spec.max_steps) self.assertEqual(train_steps, train_spec.max_steps)
self.assertEqual(2, len(eval_specs)) self.assertEqual(2, len(eval_specs))
self.assertEqual(eval_steps, eval_specs[0].steps) self.assertEqual(None, eval_specs[0].steps)
self.assertEqual('holdout', eval_specs[0].name) self.assertEqual('holdout', eval_specs[0].name)
self.assertEqual('exporter', eval_specs[0].exporters[0].name) self.assertEqual('exporter_holdout', eval_specs[0].exporters[0].name)
self.assertEqual(eval_on_train_steps, eval_specs[1].steps) self.assertEqual(None, eval_specs[1].steps)
self.assertEqual('eval_on_train', eval_specs[1].name) self.assertEqual('eval_on_train', eval_specs[1].name)
def test_experiment(self): def test_experiment(self):
...@@ -339,7 +327,7 @@ class ModelLibTest(tf.test.TestCase): ...@@ -339,7 +327,7 @@ class ModelLibTest(tf.test.TestCase):
train_steps=10, train_steps=10,
eval_steps=20) eval_steps=20)
self.assertEqual(10, experiment.train_steps) self.assertEqual(10, experiment.train_steps)
self.assertEqual(20, experiment.eval_steps) self.assertEqual(None, experiment.eval_steps)
class UnbatchTensorsTest(tf.test.TestCase): class UnbatchTensorsTest(tf.test.TestCase):
......
...@@ -31,7 +31,16 @@ flags.DEFINE_string( ...@@ -31,7 +31,16 @@ flags.DEFINE_string(
flags.DEFINE_string('pipeline_config_path', None, 'Path to pipeline config ' flags.DEFINE_string('pipeline_config_path', None, 'Path to pipeline config '
'file.') 'file.')
flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.') flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.')
flags.DEFINE_integer('num_eval_steps', None, 'Number of train steps.') flags.DEFINE_boolean('eval_training_data', False,
'If training data should be evaluated for this job. Note '
'that one call only use this in eval-only mode, and '
'`checkpoint_dir` must be supplied.')
flags.DEFINE_integer('sample_1_of_n_eval_examples', 1, 'Will sample one of '
'every n eval input examples, where n is provided.')
flags.DEFINE_integer('sample_1_of_n_eval_on_train_examples', 5, 'Will sample '
'one of every n train input examples for evaluation, '
'where n is provided. This is only used if '
'`eval_training_data` is True.')
flags.DEFINE_string( flags.DEFINE_string(
'hparams_overrides', None, 'Hyperparameter overrides, ' 'hparams_overrides', None, 'Hyperparameter overrides, '
'represented as a string containing comma-separated ' 'represented as a string containing comma-separated '
...@@ -44,8 +53,6 @@ flags.DEFINE_boolean( ...@@ -44,8 +53,6 @@ flags.DEFINE_boolean(
'run_once', False, 'If running in eval-only mode, whether to run just ' 'run_once', False, 'If running in eval-only mode, whether to run just '
'one round of eval vs running continuously (default).' 'one round of eval vs running continuously (default).'
) )
flags.DEFINE_boolean('eval_training_data', False,
'If training data should be evaluated for this job.')
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
...@@ -59,14 +66,15 @@ def main(unused_argv): ...@@ -59,14 +66,15 @@ def main(unused_argv):
hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
pipeline_config_path=FLAGS.pipeline_config_path, pipeline_config_path=FLAGS.pipeline_config_path,
train_steps=FLAGS.num_train_steps, train_steps=FLAGS.num_train_steps,
eval_steps=FLAGS.num_eval_steps) sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples,
sample_1_of_n_eval_on_train_examples=(
FLAGS.sample_1_of_n_eval_on_train_examples))
estimator = train_and_eval_dict['estimator'] estimator = train_and_eval_dict['estimator']
train_input_fn = train_and_eval_dict['train_input_fn'] train_input_fn = train_and_eval_dict['train_input_fn']
eval_input_fn = train_and_eval_dict['eval_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns']
eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn']
predict_input_fn = train_and_eval_dict['predict_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn']
train_steps = train_and_eval_dict['train_steps'] train_steps = train_and_eval_dict['train_steps']
eval_steps = train_and_eval_dict['eval_steps']
if FLAGS.checkpoint_dir: if FLAGS.checkpoint_dir:
if FLAGS.eval_training_data: if FLAGS.eval_training_data:
...@@ -74,23 +82,23 @@ def main(unused_argv): ...@@ -74,23 +82,23 @@ def main(unused_argv):
input_fn = eval_on_train_input_fn input_fn = eval_on_train_input_fn
else: else:
name = 'validation_data' name = 'validation_data'
input_fn = eval_input_fn # The first eval input will be evaluated.
input_fn = eval_input_fns[0]
if FLAGS.run_once: if FLAGS.run_once:
estimator.evaluate(input_fn, estimator.evaluate(input_fn,
eval_steps, num_eval_steps=None,
checkpoint_path=tf.train.latest_checkpoint( checkpoint_path=tf.train.latest_checkpoint(
FLAGS.checkpoint_dir)) FLAGS.checkpoint_dir))
else: else:
model_lib.continuous_eval(estimator, FLAGS.model_dir, input_fn, model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn,
eval_steps, train_steps, name) train_steps, name)
else: else:
train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_spec, eval_specs = model_lib.create_train_and_eval_specs(
train_input_fn, train_input_fn,
eval_input_fn, eval_input_fns,
eval_on_train_input_fn, eval_on_train_input_fn,
predict_input_fn, predict_input_fn,
train_steps, train_steps,
eval_steps,
eval_on_train_data=False) eval_on_train_data=False)
# Currently only a single Eval Spec is allowed. # Currently only a single Eval Spec is allowed.
......
...@@ -62,15 +62,20 @@ flags.DEFINE_integer('train_batch_size', None, 'Batch size for training. If ' ...@@ -62,15 +62,20 @@ flags.DEFINE_integer('train_batch_size', None, 'Batch size for training. If '
flags.DEFINE_string( flags.DEFINE_string(
'hparams_overrides', None, 'Comma-separated list of ' 'hparams_overrides', None, 'Comma-separated list of '
'hyperparameters to override defaults.') 'hyperparameters to override defaults.')
flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.')
flags.DEFINE_boolean('eval_training_data', False, flags.DEFINE_boolean('eval_training_data', False,
'If training data should be evaluated for this job.') 'If training data should be evaluated for this job.')
flags.DEFINE_integer('sample_1_of_n_eval_examples', 1, 'Will sample one of '
'every n eval input examples, where n is provided.')
flags.DEFINE_integer('sample_1_of_n_eval_on_train_examples', 5, 'Will sample '
'one of every n train input examples for evaluation, '
'where n is provided. This is only used if '
'`eval_training_data` is True.')
flags.DEFINE_string( flags.DEFINE_string(
'model_dir', None, 'Path to output model directory ' 'model_dir', None, 'Path to output model directory '
'where event and checkpoint files will be written.') 'where event and checkpoint files will be written.')
flags.DEFINE_string('pipeline_config_path', None, 'Path to pipeline config ' flags.DEFINE_string('pipeline_config_path', None, 'Path to pipeline config '
'file.') 'file.')
flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.')
flags.DEFINE_integer('num_eval_steps', None, 'Number of train steps.')
FLAGS = tf.flags.FLAGS FLAGS = tf.flags.FLAGS
...@@ -103,17 +108,18 @@ def main(unused_argv): ...@@ -103,17 +108,18 @@ def main(unused_argv):
hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
pipeline_config_path=FLAGS.pipeline_config_path, pipeline_config_path=FLAGS.pipeline_config_path,
train_steps=FLAGS.num_train_steps, train_steps=FLAGS.num_train_steps,
eval_steps=FLAGS.num_eval_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples,
sample_1_of_n_eval_on_train_examples=(
FLAGS.sample_1_of_n_eval_on_train_examples),
use_tpu_estimator=True, use_tpu_estimator=True,
use_tpu=FLAGS.use_tpu, use_tpu=FLAGS.use_tpu,
num_shards=FLAGS.num_shards, num_shards=FLAGS.num_shards,
**kwargs) **kwargs)
estimator = train_and_eval_dict['estimator'] estimator = train_and_eval_dict['estimator']
train_input_fn = train_and_eval_dict['train_input_fn'] train_input_fn = train_and_eval_dict['train_input_fn']
eval_input_fn = train_and_eval_dict['eval_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns']
eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn']
train_steps = train_and_eval_dict['train_steps'] train_steps = train_and_eval_dict['train_steps']
eval_steps = train_and_eval_dict['eval_steps']
if FLAGS.mode == 'train': if FLAGS.mode == 'train':
estimator.train(input_fn=train_input_fn, max_steps=train_steps) estimator.train(input_fn=train_input_fn, max_steps=train_steps)
...@@ -125,9 +131,10 @@ def main(unused_argv): ...@@ -125,9 +131,10 @@ def main(unused_argv):
input_fn = eval_on_train_input_fn input_fn = eval_on_train_input_fn
else: else:
name = 'validation_data' name = 'validation_data'
input_fn = eval_input_fn # Currently only a single eval input is allowed.
model_lib.continuous_eval(estimator, FLAGS.model_dir, input_fn, eval_steps, input_fn = eval_input_fns[0]
train_steps, name) model_lib.continuous_eval(estimator, FLAGS.model_dir, input_fn, train_steps,
name)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -24,6 +24,7 @@ Feature map generators build on the base feature extractors and produce a list ...@@ -24,6 +24,7 @@ Feature map generators build on the base feature extractors and produce a list
of final feature maps. of final feature maps.
""" """
import collections import collections
import functools
import tensorflow as tf import tensorflow as tf
from object_detection.utils import ops from object_detection.utils import ops
slim = tf.contrib.slim slim = tf.contrib.slim
...@@ -45,6 +46,222 @@ def get_depth_fn(depth_multiplier, min_depth): ...@@ -45,6 +46,222 @@ def get_depth_fn(depth_multiplier, min_depth):
return multiply_depth return multiply_depth
class KerasMultiResolutionFeatureMaps(tf.keras.Model):
"""Generates multi resolution feature maps from input image features.
A Keras model that generates multi-scale feature maps for detection as in the
SSD papers by Liu et al: https://arxiv.org/pdf/1512.02325v2.pdf, See Sec 2.1.
More specifically, when called on inputs it performs the following two tasks:
1) If a layer name is provided in the configuration, returns that layer as a
feature map.
2) If a layer name is left as an empty string, constructs a new feature map
based on the spatial shape and depth configuration. Note that the current
implementation only supports generating new layers using convolution of
stride 2 resulting in a spatial resolution reduction by a factor of 2.
By default convolution kernel size is set to 3, and it can be customized
by caller.
An example of the configuration for Inception V3:
{
'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 128]
}
When this feature generator object is called on input image_features:
Args:
image_features: A dictionary of handles to activation tensors from the
base feature extractor.
Returns:
feature_maps: an OrderedDict mapping keys (feature map names) to
tensors where each tensor has shape [batch, height_i, width_i, depth_i].
"""
def __init__(self,
feature_map_layout,
depth_multiplier,
min_depth,
insert_1x1_conv,
is_training,
conv_hyperparams,
freeze_batchnorm,
name=None):
"""Constructor.
Args:
feature_map_layout: Dictionary of specifications for the feature map
layouts in the following format (Inception V2/V3 respectively):
{
'from_layer': ['Mixed_3c', 'Mixed_4c', 'Mixed_5c', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 128]
}
or
{
'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 128]
}
If 'from_layer' is specified, the specified feature map is directly used
as a box predictor layer, and the layer_depth is directly infered from
the feature map (instead of using the provided 'layer_depth' parameter).
In this case, our convention is to set 'layer_depth' to -1 for clarity.
Otherwise, if 'from_layer' is an empty string, then the box predictor
layer will be built from the previous layer using convolution
operations. Note that the current implementation only supports
generating new layers using convolutions of stride 2 (resulting in a
spatial resolution reduction by a factor of 2), and will be extended to
a more flexible design. Convolution kernel size is set to 3 by default,
and can be customized by 'conv_kernel_size' parameter (similarily,
'conv_kernel_size' should be set to -1 if 'from_layer' is specified).
The created convolution operation will be a normal 2D convolution by
default, and a depthwise convolution followed by 1x1 convolution if
'use_depthwise' is set to True.
depth_multiplier: Depth multiplier for convolutional layers.
min_depth: Minimum depth for convolutional layers.
insert_1x1_conv: A boolean indicating whether an additional 1x1
convolution should be inserted before shrinking the feature map.
is_training: Indicates whether the feature generator is in training mode.
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
containing hyperparameters for convolution ops.
freeze_batchnorm: Bool. Whether to freeze batch norm parameters during
training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm
params.
name: A string name scope to assign to the model. If 'None', Keras
will auto-generate one from the class name.
"""
super(KerasMultiResolutionFeatureMaps, self).__init__(name=name)
self.feature_map_layout = feature_map_layout
self.convolutions = []
depth_fn = get_depth_fn(depth_multiplier, min_depth)
base_from_layer = ''
use_explicit_padding = False
if 'use_explicit_padding' in feature_map_layout:
use_explicit_padding = feature_map_layout['use_explicit_padding']
use_depthwise = False
if 'use_depthwise' in feature_map_layout:
use_depthwise = feature_map_layout['use_depthwise']
for index, from_layer in enumerate(feature_map_layout['from_layer']):
net = []
self.convolutions.append(net)
layer_depth = feature_map_layout['layer_depth'][index]
conv_kernel_size = 3
if 'conv_kernel_size' in feature_map_layout:
conv_kernel_size = feature_map_layout['conv_kernel_size'][index]
if from_layer:
base_from_layer = from_layer
else:
if insert_1x1_conv:
layer_name = '{}_1_Conv2d_{}_1x1_{}'.format(
base_from_layer, index, depth_fn(layer_depth / 2))
net.append(tf.keras.layers.Conv2D(depth_fn(layer_depth / 2),
[1, 1],
padding='SAME',
strides=1,
name=layer_name + '_conv',
**conv_hyperparams.params()))
net.append(
conv_hyperparams.build_batch_norm(
training=(is_training and not freeze_batchnorm),
name=layer_name + '_batchnorm'))
net.append(
conv_hyperparams.build_activation_layer(
name=layer_name))
layer_name = '{}_2_Conv2d_{}_{}x{}_s2_{}'.format(
base_from_layer, index, conv_kernel_size, conv_kernel_size,
depth_fn(layer_depth))
stride = 2
padding = 'SAME'
if use_explicit_padding:
padding = 'VALID'
# We define this function here while capturing the value of
# conv_kernel_size, to avoid holding a reference to the loop variable
# conv_kernel_size inside of a lambda function
def fixed_padding(features, kernel_size=conv_kernel_size):
return ops.fixed_padding(features, kernel_size)
net.append(tf.keras.layers.Lambda(fixed_padding))
# TODO(rathodv): Add some utilities to simplify the creation of
# Depthwise & non-depthwise convolutions w/ normalization & activations
if use_depthwise:
net.append(tf.keras.layers.DepthwiseConv2D(
[conv_kernel_size, conv_kernel_size],
depth_multiplier=1,
padding=padding,
strides=stride,
name=layer_name + '_depthwise_conv',
**conv_hyperparams.params()))
net.append(
conv_hyperparams.build_batch_norm(
training=(is_training and not freeze_batchnorm),
name=layer_name + '_depthwise_batchnorm'))
net.append(
conv_hyperparams.build_activation_layer(
name=layer_name + '_depthwise'))
net.append(tf.keras.layers.Conv2D(depth_fn(layer_depth), [1, 1],
padding='SAME',
strides=1,
name=layer_name + '_conv',
**conv_hyperparams.params()))
net.append(
conv_hyperparams.build_batch_norm(
training=(is_training and not freeze_batchnorm),
name=layer_name + '_batchnorm'))
net.append(
conv_hyperparams.build_activation_layer(
name=layer_name))
else:
net.append(tf.keras.layers.Conv2D(
depth_fn(layer_depth),
[conv_kernel_size, conv_kernel_size],
padding=padding,
strides=stride,
name=layer_name + '_conv',
**conv_hyperparams.params()))
net.append(
conv_hyperparams.build_batch_norm(
training=(is_training and not freeze_batchnorm),
name=layer_name + '_batchnorm'))
net.append(
conv_hyperparams.build_activation_layer(
name=layer_name))
def call(self, image_features):
"""Generate the multi-resolution feature maps.
Executed when calling the `.__call__` method on input.
Args:
image_features: A dictionary of handles to activation tensors from the
base feature extractor.
Returns:
feature_maps: an OrderedDict mapping keys (feature map names) to
tensors where each tensor has shape [batch, height_i, width_i, depth_i].
"""
feature_maps = []
feature_map_keys = []
for index, from_layer in enumerate(self.feature_map_layout['from_layer']):
if from_layer:
feature_map = image_features[from_layer]
feature_map_keys.append(from_layer)
else:
feature_map = feature_maps[-1]
for layer in self.convolutions[index]:
feature_map = layer(feature_map)
layer_name = self.convolutions[index][-1].name
feature_map_keys.append(layer_name)
feature_maps.append(feature_map)
return collections.OrderedDict(
[(x, y) for (x, y) in zip(feature_map_keys, feature_maps)])
def multi_resolution_feature_maps(feature_map_layout, depth_multiplier, def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
min_depth, insert_1x1_conv, image_features): min_depth, insert_1x1_conv, image_features):
"""Generates multi resolution feature maps from input image features. """Generates multi resolution feature maps from input image features.
...@@ -77,7 +294,7 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier, ...@@ -77,7 +294,7 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
} }
or or
{ {
'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', '', ''], 'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 128] 'layer_depth': [-1, -1, -1, 512, 256, 128]
} }
If 'from_layer' is specified, the specified feature map is directly used If 'from_layer' is specified, the specified feature map is directly used
...@@ -179,7 +396,10 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier, ...@@ -179,7 +396,10 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
[(x, y) for (x, y) in zip(feature_map_keys, feature_maps)]) [(x, y) for (x, y) in zip(feature_map_keys, feature_maps)])
def fpn_top_down_feature_maps(image_features, depth, scope=None): def fpn_top_down_feature_maps(image_features,
depth,
use_depthwise=False,
scope=None):
"""Generates `top-down` feature maps for Feature Pyramid Networks. """Generates `top-down` feature maps for Feature Pyramid Networks.
See https://arxiv.org/abs/1612.03144 for details. See https://arxiv.org/abs/1612.03144 for details.
...@@ -189,6 +409,7 @@ def fpn_top_down_feature_maps(image_features, depth, scope=None): ...@@ -189,6 +409,7 @@ def fpn_top_down_feature_maps(image_features, depth, scope=None):
Spatial resolutions of succesive tensors must reduce exactly by a factor Spatial resolutions of succesive tensors must reduce exactly by a factor
of 2. of 2.
depth: depth of output feature maps. depth: depth of output feature maps.
use_depthwise: use depthwise separable conv instead of regular conv.
scope: A scope name to wrap this op under. scope: A scope name to wrap this op under.
Returns: Returns:
...@@ -200,7 +421,7 @@ def fpn_top_down_feature_maps(image_features, depth, scope=None): ...@@ -200,7 +421,7 @@ def fpn_top_down_feature_maps(image_features, depth, scope=None):
output_feature_maps_list = [] output_feature_maps_list = []
output_feature_map_keys = [] output_feature_map_keys = []
with slim.arg_scope( with slim.arg_scope(
[slim.conv2d], padding='SAME', stride=1): [slim.conv2d, slim.separable_conv2d], padding='SAME', stride=1):
top_down = slim.conv2d( top_down = slim.conv2d(
image_features[-1][1], image_features[-1][1],
depth, [1, 1], activation_fn=None, normalizer_fn=None, depth, [1, 1], activation_fn=None, normalizer_fn=None,
...@@ -216,7 +437,11 @@ def fpn_top_down_feature_maps(image_features, depth, scope=None): ...@@ -216,7 +437,11 @@ def fpn_top_down_feature_maps(image_features, depth, scope=None):
activation_fn=None, normalizer_fn=None, activation_fn=None, normalizer_fn=None,
scope='projection_%d' % (level + 1)) scope='projection_%d' % (level + 1))
top_down += residual top_down += residual
output_feature_maps_list.append(slim.conv2d( if use_depthwise:
conv_op = functools.partial(slim.separable_conv2d, depth_multiplier=1)
else:
conv_op = slim.conv2d
output_feature_maps_list.append(conv_op(
top_down, top_down,
depth, [3, 3], depth, [3, 3],
scope='smoothing_%d' % (level + 1))) scope='smoothing_%d' % (level + 1)))
...@@ -226,7 +451,7 @@ def fpn_top_down_feature_maps(image_features, depth, scope=None): ...@@ -226,7 +451,7 @@ def fpn_top_down_feature_maps(image_features, depth, scope=None):
def pooling_pyramid_feature_maps(base_feature_map_depth, num_layers, def pooling_pyramid_feature_maps(base_feature_map_depth, num_layers,
image_features): image_features, replace_pool_with_conv=False):
"""Generates pooling pyramid feature maps. """Generates pooling pyramid feature maps.
The pooling pyramid feature maps is motivated by The pooling pyramid feature maps is motivated by
...@@ -250,6 +475,8 @@ def pooling_pyramid_feature_maps(base_feature_map_depth, num_layers, ...@@ -250,6 +475,8 @@ def pooling_pyramid_feature_maps(base_feature_map_depth, num_layers,
from the base feature. from the base feature.
image_features: A dictionary of handles to activation tensors from the image_features: A dictionary of handles to activation tensors from the
feature extractor. feature extractor.
replace_pool_with_conv: Whether or not to replace pooling operations with
convolutions in the PPN. Default is False.
Returns: Returns:
feature_maps: an OrderedDict mapping keys (feature map names) to feature_maps: an OrderedDict mapping keys (feature map names) to
...@@ -279,12 +506,22 @@ def pooling_pyramid_feature_maps(base_feature_map_depth, num_layers, ...@@ -279,12 +506,22 @@ def pooling_pyramid_feature_maps(base_feature_map_depth, num_layers,
feature_map_keys.append(feature_map_key) feature_map_keys.append(feature_map_key)
feature_maps.append(image_features) feature_maps.append(image_features)
feature_map = image_features feature_map = image_features
with slim.arg_scope([slim.max_pool2d], padding='SAME', stride=2): if replace_pool_with_conv:
for i in range(num_layers - 1): with slim.arg_scope([slim.conv2d], padding='SAME', stride=2):
feature_map_key = 'MaxPool2d_%d_2x2' % i for i in range(num_layers - 1):
feature_map = slim.max_pool2d( feature_map_key = 'Conv2d_{}_3x3_s2_{}'.format(i,
feature_map, [2, 2], padding='SAME', scope=feature_map_key) base_feature_map_depth)
feature_map_keys.append(feature_map_key) feature_map = slim.conv2d(
feature_maps.append(feature_map) feature_map, base_feature_map_depth, [3, 3], scope=feature_map_key)
feature_map_keys.append(feature_map_key)
feature_maps.append(feature_map)
else:
with slim.arg_scope([slim.max_pool2d], padding='SAME', stride=2):
for i in range(num_layers - 1):
feature_map_key = 'MaxPool2d_%d_2x2' % i
feature_map = slim.max_pool2d(
feature_map, [2, 2], padding='SAME', scope=feature_map_key)
feature_map_keys.append(feature_map_key)
feature_maps.append(feature_map)
return collections.OrderedDict( return collections.OrderedDict(
[(x, y) for (x, y) in zip(feature_map_keys, feature_maps)]) [(x, y) for (x, y) in zip(feature_map_keys, feature_maps)])
...@@ -15,9 +15,15 @@ ...@@ -15,9 +15,15 @@
"""Tests for feature map generators.""" """Tests for feature map generators."""
from absl.testing import parameterized
import tensorflow as tf import tensorflow as tf
from google.protobuf import text_format
from object_detection.builders import hyperparams_builder
from object_detection.models import feature_map_generators from object_detection.models import feature_map_generators
from object_detection.protos import hyperparams_pb2
INCEPTION_V2_LAYOUT = { INCEPTION_V2_LAYOUT = {
'from_layer': ['Mixed_3c', 'Mixed_4c', 'Mixed_5c', '', '', ''], 'from_layer': ['Mixed_3c', 'Mixed_4c', 'Mixed_5c', '', '', ''],
...@@ -40,21 +46,60 @@ EMBEDDED_SSD_MOBILENET_V1_LAYOUT = { ...@@ -40,21 +46,60 @@ EMBEDDED_SSD_MOBILENET_V1_LAYOUT = {
} }
# TODO(rathodv): add tests with different anchor strides. @parameterized.parameters(
{'use_keras': False},
{'use_keras': True},
)
class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase): class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
def test_get_expected_feature_map_shapes_with_inception_v2(self): def _build_conv_hyperparams(self):
conv_hyperparams = hyperparams_pb2.Hyperparams()
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
"""
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
def _build_feature_map_generator(self, feature_map_layout, use_keras):
if use_keras:
return feature_map_generators.KerasMultiResolutionFeatureMaps(
feature_map_layout=feature_map_layout,
depth_multiplier=1,
min_depth=32,
insert_1x1_conv=True,
freeze_batchnorm=False,
is_training=True,
conv_hyperparams=self._build_conv_hyperparams(),
name='FeatureMaps'
)
else:
def feature_map_generator(image_features):
return feature_map_generators.multi_resolution_feature_maps(
feature_map_layout=feature_map_layout,
depth_multiplier=1,
min_depth=32,
insert_1x1_conv=True,
image_features=image_features)
return feature_map_generator
def test_get_expected_feature_map_shapes_with_inception_v2(self, use_keras):
image_features = { image_features = {
'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32), 'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32), 'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32),
'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32) 'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32)
} }
feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_generator = self._build_feature_map_generator(
feature_map_layout=INCEPTION_V2_LAYOUT, feature_map_layout=INCEPTION_V2_LAYOUT,
depth_multiplier=1, use_keras=use_keras
min_depth=32, )
insert_1x1_conv=True, feature_maps = feature_map_generator(image_features)
image_features=image_features)
expected_feature_map_shapes = { expected_feature_map_shapes = {
'Mixed_3c': (4, 28, 28, 256), 'Mixed_3c': (4, 28, 28, 256),
...@@ -70,21 +115,53 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase): ...@@ -70,21 +115,53 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
out_feature_maps = sess.run(feature_maps) out_feature_maps = sess.run(feature_maps)
out_feature_map_shapes = dict( out_feature_map_shapes = dict(
(key, value.shape) for key, value in out_feature_maps.items()) (key, value.shape) for key, value in out_feature_maps.items())
self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes) self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
def test_get_expected_feature_map_shapes_use_explicit_padding(
self, use_keras):
image_features = {
'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32),
'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32)
}
layout_copy = INCEPTION_V2_LAYOUT.copy()
layout_copy['use_explicit_padding'] = True
feature_map_generator = self._build_feature_map_generator(
feature_map_layout=layout_copy,
use_keras=use_keras
)
feature_maps = feature_map_generator(image_features)
expected_feature_map_shapes = {
'Mixed_3c': (4, 28, 28, 256),
'Mixed_4c': (4, 14, 14, 576),
'Mixed_5c': (4, 7, 7, 1024),
'Mixed_5c_2_Conv2d_3_3x3_s2_512': (4, 4, 4, 512),
'Mixed_5c_2_Conv2d_4_3x3_s2_256': (4, 2, 2, 256),
'Mixed_5c_2_Conv2d_5_3x3_s2_256': (4, 1, 1, 256)}
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
out_feature_maps = sess.run(feature_maps)
out_feature_map_shapes = dict(
(key, value.shape) for key, value in out_feature_maps.items())
self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
def test_get_expected_feature_map_shapes_with_inception_v3(self): def test_get_expected_feature_map_shapes_with_inception_v3(self, use_keras):
image_features = { image_features = {
'Mixed_5d': tf.random_uniform([4, 35, 35, 256], dtype=tf.float32), 'Mixed_5d': tf.random_uniform([4, 35, 35, 256], dtype=tf.float32),
'Mixed_6e': tf.random_uniform([4, 17, 17, 576], dtype=tf.float32), 'Mixed_6e': tf.random_uniform([4, 17, 17, 576], dtype=tf.float32),
'Mixed_7c': tf.random_uniform([4, 8, 8, 1024], dtype=tf.float32) 'Mixed_7c': tf.random_uniform([4, 8, 8, 1024], dtype=tf.float32)
} }
feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_generator = self._build_feature_map_generator(
feature_map_layout=INCEPTION_V3_LAYOUT, feature_map_layout=INCEPTION_V3_LAYOUT,
depth_multiplier=1, use_keras=use_keras
min_depth=32, )
insert_1x1_conv=True, feature_maps = feature_map_generator(image_features)
image_features=image_features)
expected_feature_map_shapes = { expected_feature_map_shapes = {
'Mixed_5d': (4, 35, 35, 256), 'Mixed_5d': (4, 35, 35, 256),
...@@ -100,10 +177,10 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase): ...@@ -100,10 +177,10 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
out_feature_maps = sess.run(feature_maps) out_feature_maps = sess.run(feature_maps)
out_feature_map_shapes = dict( out_feature_map_shapes = dict(
(key, value.shape) for key, value in out_feature_maps.items()) (key, value.shape) for key, value in out_feature_maps.items())
self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes) self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
def test_get_expected_feature_map_shapes_with_embedded_ssd_mobilenet_v1( def test_get_expected_feature_map_shapes_with_embedded_ssd_mobilenet_v1(
self): self, use_keras):
image_features = { image_features = {
'Conv2d_11_pointwise': tf.random_uniform([4, 16, 16, 512], 'Conv2d_11_pointwise': tf.random_uniform([4, 16, 16, 512],
dtype=tf.float32), dtype=tf.float32),
...@@ -111,12 +188,11 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase): ...@@ -111,12 +188,11 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
dtype=tf.float32), dtype=tf.float32),
} }
feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_generator = self._build_feature_map_generator(
feature_map_layout=EMBEDDED_SSD_MOBILENET_V1_LAYOUT, feature_map_layout=EMBEDDED_SSD_MOBILENET_V1_LAYOUT,
depth_multiplier=1, use_keras=use_keras
min_depth=32, )
insert_1x1_conv=True, feature_maps = feature_map_generator(image_features)
image_features=image_features)
expected_feature_map_shapes = { expected_feature_map_shapes = {
'Conv2d_11_pointwise': (4, 16, 16, 512), 'Conv2d_11_pointwise': (4, 16, 16, 512),
...@@ -131,7 +207,62 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase): ...@@ -131,7 +207,62 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
out_feature_maps = sess.run(feature_maps) out_feature_maps = sess.run(feature_maps)
out_feature_map_shapes = dict( out_feature_map_shapes = dict(
(key, value.shape) for key, value in out_feature_maps.items()) (key, value.shape) for key, value in out_feature_maps.items())
self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes) self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
def test_get_expected_variable_names_with_inception_v2(self, use_keras):
image_features = {
'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32),
'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32)
}
feature_map_generator = self._build_feature_map_generator(
feature_map_layout=INCEPTION_V2_LAYOUT,
use_keras=use_keras
)
feature_maps = feature_map_generator(image_features)
expected_slim_variables = set([
'Mixed_5c_1_Conv2d_3_1x1_256/weights',
'Mixed_5c_1_Conv2d_3_1x1_256/biases',
'Mixed_5c_2_Conv2d_3_3x3_s2_512/weights',
'Mixed_5c_2_Conv2d_3_3x3_s2_512/biases',
'Mixed_5c_1_Conv2d_4_1x1_128/weights',
'Mixed_5c_1_Conv2d_4_1x1_128/biases',
'Mixed_5c_2_Conv2d_4_3x3_s2_256/weights',
'Mixed_5c_2_Conv2d_4_3x3_s2_256/biases',
'Mixed_5c_1_Conv2d_5_1x1_128/weights',
'Mixed_5c_1_Conv2d_5_1x1_128/biases',
'Mixed_5c_2_Conv2d_5_3x3_s2_256/weights',
'Mixed_5c_2_Conv2d_5_3x3_s2_256/biases',
])
expected_keras_variables = set([
'FeatureMaps/Mixed_5c_1_Conv2d_3_1x1_256_conv/kernel',
'FeatureMaps/Mixed_5c_1_Conv2d_3_1x1_256_conv/bias',
'FeatureMaps/Mixed_5c_2_Conv2d_3_3x3_s2_512_conv/kernel',
'FeatureMaps/Mixed_5c_2_Conv2d_3_3x3_s2_512_conv/bias',
'FeatureMaps/Mixed_5c_1_Conv2d_4_1x1_128_conv/kernel',
'FeatureMaps/Mixed_5c_1_Conv2d_4_1x1_128_conv/bias',
'FeatureMaps/Mixed_5c_2_Conv2d_4_3x3_s2_256_conv/kernel',
'FeatureMaps/Mixed_5c_2_Conv2d_4_3x3_s2_256_conv/bias',
'FeatureMaps/Mixed_5c_1_Conv2d_5_1x1_128_conv/kernel',
'FeatureMaps/Mixed_5c_1_Conv2d_5_1x1_128_conv/bias',
'FeatureMaps/Mixed_5c_2_Conv2d_5_3x3_s2_256_conv/kernel',
'FeatureMaps/Mixed_5c_2_Conv2d_5_3x3_s2_256_conv/bias',
])
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
sess.run(feature_maps)
actual_variable_set = set(
[var.op.name for var in tf.trainable_variables()])
if use_keras:
self.assertSetEqual(expected_keras_variables, actual_variable_set)
else:
self.assertSetEqual(expected_slim_variables, actual_variable_set)
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
class FPNFeatureMapGeneratorTest(tf.test.TestCase): class FPNFeatureMapGeneratorTest(tf.test.TestCase):
...@@ -161,6 +292,31 @@ class FPNFeatureMapGeneratorTest(tf.test.TestCase): ...@@ -161,6 +292,31 @@ class FPNFeatureMapGeneratorTest(tf.test.TestCase):
for key, value in out_feature_maps.items()} for key, value in out_feature_maps.items()}
self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes) self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
def test_get_expected_feature_map_shapes_with_depthwise(self):
image_features = [
('block2', tf.random_uniform([4, 8, 8, 256], dtype=tf.float32)),
('block3', tf.random_uniform([4, 4, 4, 256], dtype=tf.float32)),
('block4', tf.random_uniform([4, 2, 2, 256], dtype=tf.float32)),
('block5', tf.random_uniform([4, 1, 1, 256], dtype=tf.float32))
]
feature_maps = feature_map_generators.fpn_top_down_feature_maps(
image_features=image_features, depth=128, use_depthwise=True)
expected_feature_map_shapes = {
'top_down_block2': (4, 8, 8, 128),
'top_down_block3': (4, 4, 4, 128),
'top_down_block4': (4, 2, 2, 128),
'top_down_block5': (4, 1, 1, 128)
}
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
out_feature_maps = sess.run(feature_maps)
out_feature_map_shapes = {key: value.shape
for key, value in out_feature_maps.items()}
self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
class GetDepthFunctionTest(tf.test.TestCase): class GetDepthFunctionTest(tf.test.TestCase):
...@@ -175,5 +331,94 @@ class GetDepthFunctionTest(tf.test.TestCase): ...@@ -175,5 +331,94 @@ class GetDepthFunctionTest(tf.test.TestCase):
self.assertEqual(depth_fn(64), 32) self.assertEqual(depth_fn(64), 32)
@parameterized.parameters(
{'replace_pool_with_conv': False},
{'replace_pool_with_conv': True},
)
class PoolingPyramidFeatureMapGeneratorTest(tf.test.TestCase):
def test_get_expected_feature_map_shapes(self, replace_pool_with_conv):
image_features = {
'image_features': tf.random_uniform([4, 19, 19, 1024])
}
feature_maps = feature_map_generators.pooling_pyramid_feature_maps(
base_feature_map_depth=1024,
num_layers=6,
image_features=image_features,
replace_pool_with_conv=replace_pool_with_conv)
expected_pool_feature_map_shapes = {
'Base_Conv2d_1x1_1024': (4, 19, 19, 1024),
'MaxPool2d_0_2x2': (4, 10, 10, 1024),
'MaxPool2d_1_2x2': (4, 5, 5, 1024),
'MaxPool2d_2_2x2': (4, 3, 3, 1024),
'MaxPool2d_3_2x2': (4, 2, 2, 1024),
'MaxPool2d_4_2x2': (4, 1, 1, 1024),
}
expected_conv_feature_map_shapes = {
'Base_Conv2d_1x1_1024': (4, 19, 19, 1024),
'Conv2d_0_3x3_s2_1024': (4, 10, 10, 1024),
'Conv2d_1_3x3_s2_1024': (4, 5, 5, 1024),
'Conv2d_2_3x3_s2_1024': (4, 3, 3, 1024),
'Conv2d_3_3x3_s2_1024': (4, 2, 2, 1024),
'Conv2d_4_3x3_s2_1024': (4, 1, 1, 1024),
}
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
out_feature_maps = sess.run(feature_maps)
out_feature_map_shapes = {key: value.shape
for key, value in out_feature_maps.items()}
if replace_pool_with_conv:
self.assertDictEqual(expected_conv_feature_map_shapes,
out_feature_map_shapes)
else:
self.assertDictEqual(expected_pool_feature_map_shapes,
out_feature_map_shapes)
def test_get_expected_variable_names(self, replace_pool_with_conv):
image_features = {
'image_features': tf.random_uniform([4, 19, 19, 1024])
}
feature_maps = feature_map_generators.pooling_pyramid_feature_maps(
base_feature_map_depth=1024,
num_layers=6,
image_features=image_features,
replace_pool_with_conv=replace_pool_with_conv)
expected_pool_variables = set([
'Base_Conv2d_1x1_1024/weights',
'Base_Conv2d_1x1_1024/biases',
])
expected_conv_variables = set([
'Base_Conv2d_1x1_1024/weights',
'Base_Conv2d_1x1_1024/biases',
'Conv2d_0_3x3_s2_1024/weights',
'Conv2d_0_3x3_s2_1024/biases',
'Conv2d_1_3x3_s2_1024/weights',
'Conv2d_1_3x3_s2_1024/biases',
'Conv2d_2_3x3_s2_1024/weights',
'Conv2d_2_3x3_s2_1024/biases',
'Conv2d_3_3x3_s2_1024/weights',
'Conv2d_3_3x3_s2_1024/biases',
'Conv2d_4_3x3_s2_1024/weights',
'Conv2d_4_3x3_s2_1024/biases',
])
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
sess.run(feature_maps)
actual_variable_set = set(
[var.op.name for var in tf.trainable_variables()])
if replace_pool_with_conv:
self.assertSetEqual(expected_conv_variables, actual_variable_set)
else:
self.assertSetEqual(expected_pool_variables, actual_variable_set)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A wrapper around the MobileNet v2 models for Keras, for object detection."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from object_detection.core import freezable_batch_norm
from object_detection.utils import ops
# pylint: disable=invalid-name
# This method copied from the slim mobilenet base network code (same license)
def _make_divisible(v, divisor, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
class _LayersOverride(object):
"""Alternative Keras layers interface for the Keras MobileNetV2."""
def __init__(self,
batchnorm_training,
default_batchnorm_momentum=0.999,
conv_hyperparams=None,
use_explicit_padding=False,
alpha=1.0,
min_depth=None):
"""Alternative tf.keras.layers interface, for use by the Keras MobileNetV2.
It is used by the Keras applications kwargs injection API to
modify the Mobilenet v2 Keras application with changes required by
the Object Detection API.
These injected interfaces make the following changes to the network:
- Applies the Object Detection hyperparameter configuration
- Supports FreezableBatchNorms
- Adds support for a min number of filters for each layer
- Makes the `alpha` parameter affect the final convolution block even if it
is less than 1.0
- Adds support for explicit padding of convolutions
Args:
batchnorm_training: Bool. Assigned to Batch norm layer `training` param
when constructing `freezable_batch_norm.FreezableBatchNorm` layers.
default_batchnorm_momentum: Float. When 'conv_hyperparams' is None,
batch norm layers will be constructed using this value as the momentum.
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
containing hyperparameters for convolution ops. Optionally set to `None`
to use default mobilenet_v2 layer builders.
use_explicit_padding: If True, use 'valid' padding for convolutions,
but explicitly pre-pads inputs so that the output dimensions are the
same as if 'same' padding were used. Off by default.
alpha: The width multiplier referenced in the MobileNetV2 paper. It
modifies the number of filters in each convolutional layer.
min_depth: Minimum number of filters in the convolutional layers.
"""
self._alpha = alpha
self._batchnorm_training = batchnorm_training
self._default_batchnorm_momentum = default_batchnorm_momentum
self._conv_hyperparams = conv_hyperparams
self._use_explicit_padding = use_explicit_padding
self._min_depth = min_depth
def _FixedPaddingLayer(self, kernel_size):
return tf.keras.layers.Lambda(lambda x: ops.fixed_padding(x, kernel_size))
def Conv2D(self, filters, **kwargs):
"""Builds a Conv2D layer according to the current Object Detection config.
Overrides the Keras MobileNetV2 application's convolutions with ones that
follow the spec specified by the Object Detection hyperparameters.
Args:
filters: The number of filters to use for the convolution.
**kwargs: Keyword args specified by the Keras application for
constructing the convolution.
Returns:
A one-arg callable that will either directly apply a Keras Conv2D layer to
the input argument, or that will first pad the input then apply a Conv2D
layer.
"""
# Make sure 'alpha' is always applied to the last convolution block's size
# (This overrides the Keras application's functionality)
if kwargs.get('name') == 'Conv_1' and self._alpha < 1.0:
filters = _make_divisible(1280 * self._alpha, 8)
# Apply the minimum depth to the convolution layers
if (self._min_depth and (filters < self._min_depth)
and not kwargs.get('name').endswith('expand')):
filters = self._min_depth
if self._conv_hyperparams:
kwargs = self._conv_hyperparams.params(**kwargs)
kwargs['padding'] = 'same'
kernel_size = kwargs.get('kernel_size')
if self._use_explicit_padding and kernel_size > 1:
kwargs['padding'] = 'valid'
def padded_conv(features):
padded_features = self._FixedPaddingLayer(kernel_size)(features)
return tf.keras.layers.Conv2D(filters, **kwargs)(padded_features)
return padded_conv
else:
return tf.keras.layers.Conv2D(filters, **kwargs)
def DepthwiseConv2D(self, **kwargs):
"""Builds a DepthwiseConv2D according to the Object Detection config.
Overrides the Keras MobileNetV2 application's convolutions with ones that
follow the spec specified by the Object Detection hyperparameters.
Args:
**kwargs: Keyword args specified by the Keras application for
constructing the convolution.
Returns:
A one-arg callable that will either directly apply a Keras DepthwiseConv2D
layer to the input argument, or that will first pad the input then apply
the depthwise convolution.
"""
if self._conv_hyperparams:
kwargs = self._conv_hyperparams.params(**kwargs)
kwargs['padding'] = 'same'
kernel_size = kwargs.get('kernel_size')
if self._use_explicit_padding and kernel_size > 1:
kwargs['padding'] = 'valid'
def padded_depthwise_conv(features):
padded_features = self._FixedPaddingLayer(kernel_size)(features)
return tf.keras.layers.DepthwiseConv2D(**kwargs)(padded_features)
return padded_depthwise_conv
else:
return tf.keras.layers.DepthwiseConv2D(**kwargs)
def BatchNormalization(self, **kwargs):
"""Builds a normalization layer.
Overrides the Keras application batch norm with the norm specified by the
Object Detection configuration.
Args:
**kwargs: Only the name is used, all other params ignored.
Required for matching `layers.BatchNormalization` calls in the Keras
application.
Returns:
A normalization layer specified by the Object Detection hyperparameter
configurations.
"""
name = kwargs.get('name')
if self._conv_hyperparams:
return self._conv_hyperparams.build_batch_norm(
training=self._batchnorm_training,
name=name)
else:
return freezable_batch_norm.FreezableBatchNorm(
training=self._batchnorm_training,
epsilon=1e-3,
momentum=self._default_batchnorm_momentum,
name=name)
def Input(self, shape):
"""Builds an Input layer.
Overrides the Keras application Input layer with one that uses a
tf.placeholder_with_default instead of a tf.placeholder. This is necessary
to ensure the application works when run on a TPU.
Args:
shape: The shape for the input layer to use. (Does not include a dimension
for the batch size).
Returns:
An input layer for the specified shape that internally uses a
placeholder_with_default.
"""
default_size = 224
default_batch_size = 1
shape = list(shape)
default_shape = [default_size if dim is None else dim for dim in shape]
input_tensor = tf.constant(0.0, shape=[default_batch_size] + default_shape)
placeholder_with_default = tf.placeholder_with_default(
input=input_tensor, shape=[None] + shape)
return tf.keras.layers.Input(tensor=placeholder_with_default)
# pylint: disable=unused-argument
def ReLU(self, *args, **kwargs):
"""Builds an activation layer.
Overrides the Keras application ReLU with the activation specified by the
Object Detection configuration.
Args:
*args: Ignored, required to match the `tf.keras.ReLU` interface
**kwargs: Only the name is used,
required to match `tf.keras.ReLU` interface
Returns:
An activation layer specified by the Object Detection hyperparameter
configurations.
"""
name = kwargs.get('name')
if self._conv_hyperparams:
return self._conv_hyperparams.build_activation_layer(name=name)
else:
return tf.keras.layers.Lambda(tf.nn.relu6, name=name)
# pylint: enable=unused-argument
# pylint: disable=unused-argument
def ZeroPadding2D(self, **kwargs):
"""Replaces explicit padding in the Keras application with a no-op.
Args:
**kwargs: Ignored, required to match the Keras applications usage.
Returns:
A no-op identity lambda.
"""
return lambda x: x
# pylint: enable=unused-argument
# Forward all non-overridden methods to the keras layers
def __getattr__(self, item):
return getattr(tf.keras.layers, item)
def mobilenet_v2(batchnorm_training,
default_batchnorm_momentum=0.9997,
conv_hyperparams=None,
use_explicit_padding=False,
alpha=1.0,
min_depth=None,
**kwargs):
"""Instantiates the MobileNetV2 architecture, modified for object detection.
This wraps the MobileNetV2 tensorflow Keras application, but uses the
Keras application's kwargs-based monkey-patching API to override the Keras
architecture with the following changes:
- Changes the default batchnorm momentum to 0.9997
- Applies the Object Detection hyperparameter configuration
- Supports FreezableBatchNorms
- Adds support for a min number of filters for each layer
- Makes the `alpha` parameter affect the final convolution block even if it
is less than 1.0
- Adds support for explicit padding of convolutions
- Makes the Input layer use a tf.placeholder_with_default instead of a
tf.placeholder, to work on TPUs.
Args:
batchnorm_training: Bool. Assigned to Batch norm layer `training` param
when constructing `freezable_batch_norm.FreezableBatchNorm` layers.
default_batchnorm_momentum: Float. When 'conv_hyperparams' is None,
batch norm layers will be constructed using this value as the momentum.
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
containing hyperparameters for convolution ops. Optionally set to `None`
to use default mobilenet_v2 layer builders.
use_explicit_padding: If True, use 'valid' padding for convolutions,
but explicitly pre-pads inputs so that the output dimensions are the
same as if 'same' padding were used. Off by default.
alpha: The width multiplier referenced in the MobileNetV2 paper. It
modifies the number of filters in each convolutional layer.
min_depth: Minimum number of filters in the convolutional layers.
**kwargs: Keyword arguments forwarded directly to the
`tf.keras.applications.MobilenetV2` method that constructs the Keras
model.
Returns:
A Keras model instance.
"""
layers_override = _LayersOverride(
batchnorm_training,
default_batchnorm_momentum=default_batchnorm_momentum,
conv_hyperparams=conv_hyperparams,
use_explicit_padding=use_explicit_padding,
min_depth=min_depth,
alpha=alpha)
return tf.keras.applications.MobileNetV2(alpha=alpha,
layers=layers_override,
**kwargs)
# pylint: enable=invalid-name
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for mobilenet_v2."""
import itertools
import numpy as np
import tensorflow as tf
from google.protobuf import text_format
from object_detection.builders import hyperparams_builder
from object_detection.models.keras_applications import mobilenet_v2
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
_layers_to_check = [
'Conv1_relu',
'block_1_expand_relu', 'block_1_depthwise_relu', 'block_1_project_BN',
'block_2_expand_relu', 'block_2_depthwise_relu', 'block_2_project_BN',
'block_3_expand_relu', 'block_3_depthwise_relu', 'block_3_project_BN',
'block_4_expand_relu', 'block_4_depthwise_relu', 'block_4_project_BN',
'block_5_expand_relu', 'block_5_depthwise_relu', 'block_5_project_BN',
'block_6_expand_relu', 'block_6_depthwise_relu', 'block_6_project_BN',
'block_7_expand_relu', 'block_7_depthwise_relu', 'block_7_project_BN',
'block_8_expand_relu', 'block_8_depthwise_relu', 'block_8_project_BN',
'block_9_expand_relu', 'block_9_depthwise_relu', 'block_9_project_BN',
'block_10_expand_relu', 'block_10_depthwise_relu', 'block_10_project_BN',
'block_11_expand_relu', 'block_11_depthwise_relu', 'block_11_project_BN',
'block_12_expand_relu', 'block_12_depthwise_relu', 'block_12_project_BN',
'block_13_expand_relu', 'block_13_depthwise_relu', 'block_13_project_BN',
'block_14_expand_relu', 'block_14_depthwise_relu', 'block_14_project_BN',
'block_15_expand_relu', 'block_15_depthwise_relu', 'block_15_project_BN',
'block_16_expand_relu', 'block_16_depthwise_relu', 'block_16_project_BN',
'out_relu']
class MobilenetV2Test(test_case.TestCase):
def _build_conv_hyperparams(self):
conv_hyperparams = hyperparams_pb2.Hyperparams()
conv_hyperparams_text_proto = """
activation: RELU_6
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
batch_norm {
train: true,
scale: false,
center: true,
decay: 0.2,
epsilon: 0.1,
}
"""
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
def _create_application_with_layer_outputs(
self, layer_names, batchnorm_training,
conv_hyperparams=None,
use_explicit_padding=False,
alpha=1.0,
min_depth=None):
"""Constructs Keras mobilenetv2 that extracts intermediate layer outputs."""
if not layer_names:
layer_names = _layers_to_check
full_model = mobilenet_v2.mobilenet_v2(
batchnorm_training=batchnorm_training,
conv_hyperparams=conv_hyperparams,
weights=None,
use_explicit_padding=use_explicit_padding,
alpha=alpha,
min_depth=min_depth,
include_top=False)
layer_outputs = [full_model.get_layer(name=layer).output
for layer in layer_names]
return tf.keras.Model(
inputs=full_model.inputs,
outputs=layer_outputs)
def _check_returns_correct_shape(
self, batch_size, image_height, image_width, depth_multiplier,
expected_feature_map_shapes, use_explicit_padding=False, min_depth=None,
layer_names=None):
def graph_fn(image_tensor):
model = self._create_application_with_layer_outputs(
layer_names=layer_names,
batchnorm_training=False, use_explicit_padding=use_explicit_padding,
min_depth=min_depth,
alpha=depth_multiplier)
return model(image_tensor)
image_tensor = np.random.rand(batch_size, image_height, image_width,
3).astype(np.float32)
feature_maps = self.execute(graph_fn, [image_tensor])
for feature_map, expected_shape in itertools.izip(
feature_maps, expected_feature_map_shapes):
self.assertAllEqual(feature_map.shape, expected_shape)
def _check_returns_correct_shapes_with_dynamic_inputs(
self, batch_size, image_height, image_width, depth_multiplier,
expected_feature_map_shapes, use_explicit_padding=False,
layer_names=None):
def graph_fn(image_height, image_width):
image_tensor = tf.random_uniform([batch_size, image_height, image_width,
3], dtype=tf.float32)
model = self._create_application_with_layer_outputs(
layer_names=layer_names,
batchnorm_training=False, use_explicit_padding=use_explicit_padding,
alpha=depth_multiplier)
return model(image_tensor)
feature_maps = self.execute_cpu(graph_fn, [
np.array(image_height, dtype=np.int32),
np.array(image_width, dtype=np.int32)
])
for feature_map, expected_shape in itertools.izip(
feature_maps, expected_feature_map_shapes):
self.assertAllEqual(feature_map.shape, expected_shape)
def _get_variables(self, depth_multiplier, layer_names=None):
g = tf.Graph()
with g.as_default():
preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
model = self._create_application_with_layer_outputs(
layer_names=layer_names,
batchnorm_training=False, use_explicit_padding=False,
alpha=depth_multiplier)
model(preprocessed_inputs)
return g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
def test_returns_correct_shapes_128(self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
expected_feature_map_shape = [(2, 64, 64, 32),
(2, 64, 64, 96),
(2, 32, 32, 96),
(2, 32, 32, 24),
(2, 32, 32, 144),
(2, 32, 32, 144),
(2, 32, 32, 24),
(2, 32, 32, 144),
(2, 16, 16, 144),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 16, 16, 192),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 16, 16, 192),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 8, 8, 192),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 8, 8, 576),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 8, 8, 576),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 4, 4, 576),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 320),
(2, 4, 4, 1280)]
self._check_returns_correct_shape(
2, image_height, image_width, depth_multiplier,
expected_feature_map_shape)
def test_returns_correct_shapes_128_explicit_padding(
self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
expected_feature_map_shape = [(2, 64, 64, 32),
(2, 64, 64, 96),
(2, 32, 32, 96),
(2, 32, 32, 24),
(2, 32, 32, 144),
(2, 32, 32, 144),
(2, 32, 32, 24),
(2, 32, 32, 144),
(2, 16, 16, 144),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 16, 16, 192),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 16, 16, 192),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 8, 8, 192),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 8, 8, 576),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 8, 8, 576),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 4, 4, 576),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 320),
(2, 4, 4, 1280)]
self._check_returns_correct_shape(
2, image_height, image_width, depth_multiplier,
expected_feature_map_shape, use_explicit_padding=True)
def test_returns_correct_shapes_with_dynamic_inputs(
self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
expected_feature_map_shape = [(2, 64, 64, 32),
(2, 64, 64, 96),
(2, 32, 32, 96),
(2, 32, 32, 24),
(2, 32, 32, 144),
(2, 32, 32, 144),
(2, 32, 32, 24),
(2, 32, 32, 144),
(2, 16, 16, 144),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 16, 16, 192),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 16, 16, 192),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 8, 8, 192),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 8, 8, 576),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 8, 8, 576),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 4, 4, 576),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 320),
(2, 4, 4, 1280)]
self._check_returns_correct_shapes_with_dynamic_inputs(
2, image_height, image_width, depth_multiplier,
expected_feature_map_shape)
def test_returns_correct_shapes_299(self):
image_height = 299
image_width = 299
depth_multiplier = 1.0
expected_feature_map_shape = [(2, 150, 150, 32),
(2, 150, 150, 96),
(2, 75, 75, 96),
(2, 75, 75, 24),
(2, 75, 75, 144),
(2, 75, 75, 144),
(2, 75, 75, 24),
(2, 75, 75, 144),
(2, 38, 38, 144),
(2, 38, 38, 32),
(2, 38, 38, 192),
(2, 38, 38, 192),
(2, 38, 38, 32),
(2, 38, 38, 192),
(2, 38, 38, 192),
(2, 38, 38, 32),
(2, 38, 38, 192),
(2, 19, 19, 192),
(2, 19, 19, 64),
(2, 19, 19, 384),
(2, 19, 19, 384),
(2, 19, 19, 64),
(2, 19, 19, 384),
(2, 19, 19, 384),
(2, 19, 19, 64),
(2, 19, 19, 384),
(2, 19, 19, 384),
(2, 19, 19, 64),
(2, 19, 19, 384),
(2, 19, 19, 384),
(2, 19, 19, 96),
(2, 19, 19, 576),
(2, 19, 19, 576),
(2, 19, 19, 96),
(2, 19, 19, 576),
(2, 19, 19, 576),
(2, 19, 19, 96),
(2, 19, 19, 576),
(2, 10, 10, 576),
(2, 10, 10, 160),
(2, 10, 10, 960),
(2, 10, 10, 960),
(2, 10, 10, 160),
(2, 10, 10, 960),
(2, 10, 10, 960),
(2, 10, 10, 160),
(2, 10, 10, 960),
(2, 10, 10, 960),
(2, 10, 10, 320),
(2, 10, 10, 1280)]
self._check_returns_correct_shape(
2, image_height, image_width, depth_multiplier,
expected_feature_map_shape)
def test_returns_correct_shapes_enforcing_min_depth(
self):
image_height = 299
image_width = 299
depth_multiplier = 0.5**12
expected_feature_map_shape = [(2, 150, 150, 32),
(2, 150, 150, 192),
(2, 75, 75, 192),
(2, 75, 75, 32),
(2, 75, 75, 192),
(2, 75, 75, 192),
(2, 75, 75, 32),
(2, 75, 75, 192),
(2, 38, 38, 192),
(2, 38, 38, 32),
(2, 38, 38, 192),
(2, 38, 38, 192),
(2, 38, 38, 32),
(2, 38, 38, 192),
(2, 38, 38, 192),
(2, 38, 38, 32),
(2, 38, 38, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 10, 10, 192),
(2, 10, 10, 32),
(2, 10, 10, 192),
(2, 10, 10, 192),
(2, 10, 10, 32),
(2, 10, 10, 192),
(2, 10, 10, 192),
(2, 10, 10, 32),
(2, 10, 10, 192),
(2, 10, 10, 192),
(2, 10, 10, 32),
(2, 10, 10, 32)]
self._check_returns_correct_shape(
2, image_height, image_width, depth_multiplier,
expected_feature_map_shape, min_depth=32)
def test_hyperparam_override(self):
hyperparams = self._build_conv_hyperparams()
model = mobilenet_v2.mobilenet_v2(
batchnorm_training=True,
conv_hyperparams=hyperparams,
weights=None,
use_explicit_padding=False,
alpha=1.0,
min_depth=32,
include_top=False)
hyperparams.params()
bn_layer = model.get_layer(name='block_5_project_BN')
self.assertAllClose(bn_layer.momentum, 0.2)
self.assertAllClose(bn_layer.epsilon, 0.1)
def test_variable_count(self):
depth_multiplier = 1
variables = self._get_variables(depth_multiplier)
self.assertEqual(len(variables), 260)
if __name__ == '__main__':
tf.test.main()
...@@ -21,18 +21,40 @@ import itertools ...@@ -21,18 +21,40 @@ import itertools
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from google.protobuf import text_format
from object_detection.builders import hyperparams_builder
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case from object_detection.utils import test_case
class SsdFeatureExtractorTestBase(test_case.TestCase): class SsdFeatureExtractorTestBase(test_case.TestCase):
def _build_conv_hyperparams(self):
conv_hyperparams = hyperparams_pb2.Hyperparams()
conv_hyperparams_text_proto = """
activation: RELU_6
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
batch_norm {
scale: false
}
"""
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
def conv_hyperparams_fn(self): def conv_hyperparams_fn(self):
with tf.contrib.slim.arg_scope([]) as sc: with tf.contrib.slim.arg_scope([]) as sc:
return sc return sc
@abstractmethod @abstractmethod
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
use_explicit_padding=False): use_explicit_padding=False, use_keras=False):
"""Constructs a new feature extractor. """Constructs a new feature extractor.
Args: Args:
...@@ -42,20 +64,42 @@ class SsdFeatureExtractorTestBase(test_case.TestCase): ...@@ -42,20 +64,42 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
use_explicit_padding: use 'VALID' padding for convolutions, but prepad use_explicit_padding: use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding inputs so that the output dimensions are the same as if 'SAME' padding
were used. were used.
use_keras: if True builds a keras-based feature extractor, if False builds
a slim-based one.
Returns: Returns:
an ssd_meta_arch.SSDFeatureExtractor object. an ssd_meta_arch.SSDFeatureExtractor or an
ssd_meta_arch.SSDKerasFeatureExtractor object.
""" """
pass pass
def check_extract_features_returns_correct_shape( def _extract_features(self, image_tensor, depth_multiplier, pad_to_multiple,
self, batch_size, image_height, image_width, depth_multiplier, use_explicit_padding=False, use_keras=False):
pad_to_multiple, expected_feature_map_shapes, use_explicit_padding=False): try:
def graph_fn(image_tensor): feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple,
use_explicit_padding,
use_keras=use_keras)
# If the unit test does not support a use_keras arg, it raises an error:
except TypeError:
feature_extractor = self._create_feature_extractor(depth_multiplier, feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple, pad_to_multiple,
use_explicit_padding) use_explicit_padding)
if use_keras:
feature_maps = feature_extractor(image_tensor)
else:
feature_maps = feature_extractor.extract_features(image_tensor) feature_maps = feature_extractor.extract_features(image_tensor)
return feature_maps return feature_maps
def check_extract_features_returns_correct_shape(
self, batch_size, image_height, image_width, depth_multiplier,
pad_to_multiple, expected_feature_map_shapes, use_explicit_padding=False,
use_keras=False):
def graph_fn(image_tensor):
return self._extract_features(image_tensor,
depth_multiplier,
pad_to_multiple,
use_explicit_padding,
use_keras=use_keras)
image_tensor = np.random.rand(batch_size, image_height, image_width, image_tensor = np.random.rand(batch_size, image_height, image_width,
3).astype(np.float32) 3).astype(np.float32)
...@@ -66,15 +110,16 @@ class SsdFeatureExtractorTestBase(test_case.TestCase): ...@@ -66,15 +110,16 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
def check_extract_features_returns_correct_shapes_with_dynamic_inputs( def check_extract_features_returns_correct_shapes_with_dynamic_inputs(
self, batch_size, image_height, image_width, depth_multiplier, self, batch_size, image_height, image_width, depth_multiplier,
pad_to_multiple, expected_feature_map_shapes, use_explicit_padding=False): pad_to_multiple, expected_feature_map_shapes, use_explicit_padding=False,
use_keras=False):
def graph_fn(image_height, image_width): def graph_fn(image_height, image_width):
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple,
use_explicit_padding)
image_tensor = tf.random_uniform([batch_size, image_height, image_width, image_tensor = tf.random_uniform([batch_size, image_height, image_width,
3], dtype=tf.float32) 3], dtype=tf.float32)
feature_maps = feature_extractor.extract_features(image_tensor) return self._extract_features(image_tensor,
return feature_maps depth_multiplier,
pad_to_multiple,
use_explicit_padding,
use_keras=use_keras)
feature_maps = self.execute_cpu(graph_fn, [ feature_maps = self.execute_cpu(graph_fn, [
np.array(image_height, dtype=np.int32), np.array(image_height, dtype=np.int32),
...@@ -85,11 +130,13 @@ class SsdFeatureExtractorTestBase(test_case.TestCase): ...@@ -85,11 +130,13 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
self.assertAllEqual(feature_map.shape, expected_shape) self.assertAllEqual(feature_map.shape, expected_shape)
def check_extract_features_raises_error_with_invalid_image_size( def check_extract_features_raises_error_with_invalid_image_size(
self, image_height, image_width, depth_multiplier, pad_to_multiple): self, image_height, image_width, depth_multiplier, pad_to_multiple,
feature_extractor = self._create_feature_extractor(depth_multiplier, use_keras=False):
pad_to_multiple)
preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3)) preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
feature_maps = feature_extractor.extract_features(preprocessed_inputs) feature_maps = self._extract_features(preprocessed_inputs,
depth_multiplier,
pad_to_multiple,
use_keras=use_keras)
test_preprocessed_image = np.random.rand(4, image_height, image_width, 3) test_preprocessed_image = np.random.rand(4, image_height, image_width, 3)
with self.test_session() as sess: with self.test_session() as sess:
sess.run(tf.global_variables_initializer()) sess.run(tf.global_variables_initializer())
...@@ -98,13 +145,19 @@ class SsdFeatureExtractorTestBase(test_case.TestCase): ...@@ -98,13 +145,19 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
feed_dict={preprocessed_inputs: test_preprocessed_image}) feed_dict={preprocessed_inputs: test_preprocessed_image})
def check_feature_extractor_variables_under_scope( def check_feature_extractor_variables_under_scope(
self, depth_multiplier, pad_to_multiple, scope_name): self, depth_multiplier, pad_to_multiple, scope_name, use_keras=False):
variables = self.get_feature_extractor_variables(
depth_multiplier, pad_to_multiple, use_keras)
for variable in variables:
self.assertTrue(variable.name.startswith(scope_name))
def get_feature_extractor_variables(
self, depth_multiplier, pad_to_multiple, use_keras=False):
g = tf.Graph() g = tf.Graph()
with g.as_default(): with g.as_default():
feature_extractor = self._create_feature_extractor(
depth_multiplier, pad_to_multiple)
preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3)) preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
feature_extractor.extract_features(preprocessed_inputs) self._extract_features(preprocessed_inputs,
variables = g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) depth_multiplier,
for variable in variables: pad_to_multiple,
self.assertTrue(variable.name.startswith(scope_name)) use_keras=use_keras)
return g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
"""SSD MobilenetV1 FPN Feature Extractor.""" """SSD MobilenetV1 FPN Feature Extractor."""
import copy
import functools
import tensorflow as tf import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch from object_detection.meta_architectures import ssd_meta_arch
...@@ -27,6 +29,15 @@ from nets import mobilenet_v1 ...@@ -27,6 +29,15 @@ from nets import mobilenet_v1
slim = tf.contrib.slim slim = tf.contrib.slim
# A modified config of mobilenet v1 that makes it more detection friendly,
def _create_modified_mobilenet_config():
conv_defs = copy.copy(mobilenet_v1.MOBILENETV1_CONV_DEFS)
conv_defs[-2] = mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=2, depth=512)
conv_defs[-1] = mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=256)
return conv_defs
_CONV_DEFS = _create_modified_mobilenet_config()
class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""SSD Feature Extractor using MobilenetV1 FPN features.""" """SSD Feature Extractor using MobilenetV1 FPN features."""
...@@ -38,6 +49,7 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -38,6 +49,7 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
conv_hyperparams_fn, conv_hyperparams_fn,
fpn_min_level=3, fpn_min_level=3,
fpn_max_level=7, fpn_max_level=7,
additional_layer_depth=256,
reuse_weights=None, reuse_weights=None,
use_explicit_padding=False, use_explicit_padding=False,
use_depthwise=False, use_depthwise=False,
...@@ -63,6 +75,7 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -63,6 +75,7 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
maps in the backbone network, additional feature maps are created by maps in the backbone network, additional feature maps are created by
applying stride 2 convolutions until we get the desired number of fpn applying stride 2 convolutions until we get the desired number of fpn
levels. levels.
additional_layer_depth: additional feature map layer channel depth.
reuse_weights: whether to reuse variables. Default is None. reuse_weights: whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. features. Default is False.
...@@ -84,6 +97,7 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -84,6 +97,7 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
override_base_feature_extractor_hyperparams) override_base_feature_extractor_hyperparams)
self._fpn_min_level = fpn_min_level self._fpn_min_level = fpn_min_level
self._fpn_max_level = fpn_max_level self._fpn_max_level = fpn_max_level
self._additional_layer_depth = additional_layer_depth
def preprocess(self, resized_inputs): def preprocess(self, resized_inputs):
"""SSD preprocessing. """SSD preprocessing.
...@@ -127,6 +141,7 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -127,6 +141,7 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
final_endpoint='Conv2d_13_pointwise', final_endpoint='Conv2d_13_pointwise',
min_depth=self._min_depth, min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier, depth_multiplier=self._depth_multiplier,
conv_defs=_CONV_DEFS if self._use_depthwise else None,
use_explicit_padding=self._use_explicit_padding, use_explicit_padding=self._use_explicit_padding,
scope=scope) scope=scope)
...@@ -143,7 +158,8 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -143,7 +158,8 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
feature_block_list.append(feature_blocks[level - 2]) feature_block_list.append(feature_blocks[level - 2])
fpn_features = feature_map_generators.fpn_top_down_feature_maps( fpn_features = feature_map_generators.fpn_top_down_feature_maps(
[(key, image_features[key]) for key in feature_block_list], [(key, image_features[key]) for key in feature_block_list],
depth=depth_fn(256)) depth=depth_fn(self._additional_layer_depth),
use_depthwise=self._use_depthwise)
feature_maps = [] feature_maps = []
for level in range(self._fpn_min_level, base_fpn_max_level + 1): for level in range(self._fpn_min_level, base_fpn_max_level + 1):
feature_maps.append(fpn_features['top_down_{}'.format( feature_maps.append(fpn_features['top_down_{}'.format(
...@@ -152,9 +168,14 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -152,9 +168,14 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
feature_blocks[base_fpn_max_level - 2])] feature_blocks[base_fpn_max_level - 2])]
# Construct coarse features # Construct coarse features
for i in range(base_fpn_max_level + 1, self._fpn_max_level + 1): for i in range(base_fpn_max_level + 1, self._fpn_max_level + 1):
last_feature_map = slim.conv2d( if self._use_depthwise:
conv_op = functools.partial(
slim.separable_conv2d, depth_multiplier=1)
else:
conv_op = slim.conv2d
last_feature_map = conv_op(
last_feature_map, last_feature_map,
num_outputs=depth_fn(256), num_outputs=depth_fn(self._additional_layer_depth),
kernel_size=[3, 3], kernel_size=[3, 3],
stride=2, stride=2,
padding='SAME', padding='SAME',
......
...@@ -14,20 +14,27 @@ ...@@ -14,20 +14,27 @@
# ============================================================================== # ==============================================================================
"""Tests for ssd_mobilenet_v2_feature_extractor.""" """Tests for ssd_mobilenet_v2_feature_extractor."""
from absl.testing import parameterized
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from object_detection.models import ssd_feature_extractor_test from object_detection.models import ssd_feature_extractor_test
from object_detection.models import ssd_mobilenet_v2_feature_extractor from object_detection.models import ssd_mobilenet_v2_feature_extractor
from object_detection.models import ssd_mobilenet_v2_keras_feature_extractor
slim = tf.contrib.slim slim = tf.contrib.slim
@parameterized.parameters(
{'use_keras': False},
{'use_keras': True},
)
class SsdMobilenetV2FeatureExtractorTest( class SsdMobilenetV2FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase): ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
use_explicit_padding=False): use_explicit_padding=False, use_keras=False):
"""Constructs a new feature extractor. """Constructs a new feature extractor.
Args: Args:
...@@ -37,19 +44,47 @@ class SsdMobilenetV2FeatureExtractorTest( ...@@ -37,19 +44,47 @@ class SsdMobilenetV2FeatureExtractorTest(
use_explicit_padding: use 'VALID' padding for convolutions, but prepad use_explicit_padding: use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding inputs so that the output dimensions are the same as if 'SAME' padding
were used. were used.
use_keras: if True builds a keras-based feature extractor, if False builds
a slim-based one.
Returns: Returns:
an ssd_meta_arch.SSDFeatureExtractor object. an ssd_meta_arch.SSDFeatureExtractor object.
""" """
min_depth = 32 min_depth = 32
return ssd_mobilenet_v2_feature_extractor.SSDMobileNetV2FeatureExtractor( if use_keras:
False, return (ssd_mobilenet_v2_keras_feature_extractor.
depth_multiplier, SSDMobileNetV2KerasFeatureExtractor(
min_depth, is_training=False,
pad_to_multiple, depth_multiplier=depth_multiplier,
self.conv_hyperparams_fn, min_depth=min_depth,
use_explicit_padding=use_explicit_padding) pad_to_multiple=pad_to_multiple,
conv_hyperparams=self._build_conv_hyperparams(),
def test_extract_features_returns_correct_shapes_128(self): freeze_batchnorm=False,
inplace_batchnorm_update=False,
use_explicit_padding=use_explicit_padding,
name='MobilenetV2'))
else:
return ssd_mobilenet_v2_feature_extractor.SSDMobileNetV2FeatureExtractor(
False,
depth_multiplier,
min_depth,
pad_to_multiple,
self.conv_hyperparams_fn,
use_explicit_padding=use_explicit_padding)
def test_extract_features_returns_correct_shapes_128(self, use_keras):
image_height = 128
image_width = 128
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(2, 8, 8, 576), (2, 4, 4, 1280),
(2, 2, 2, 512), (2, 1, 1, 256),
(2, 1, 1, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_keras=use_keras)
def test_extract_features_returns_correct_shapes_128_explicit_padding(
self, use_keras):
image_height = 128 image_height = 128
image_width = 128 image_width = 128
depth_multiplier = 1.0 depth_multiplier = 1.0
...@@ -59,9 +94,11 @@ class SsdMobilenetV2FeatureExtractorTest( ...@@ -59,9 +94,11 @@ class SsdMobilenetV2FeatureExtractorTest(
(2, 1, 1, 256), (2, 1, 1, 128)] (2, 1, 1, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape) expected_feature_map_shape, use_explicit_padding=True,
use_keras=use_keras)
def test_extract_features_returns_correct_shapes_with_dynamic_inputs(self): def test_extract_features_returns_correct_shapes_with_dynamic_inputs(
self, use_keras):
image_height = 128 image_height = 128
image_width = 128 image_width = 128
depth_multiplier = 1.0 depth_multiplier = 1.0
...@@ -71,9 +108,9 @@ class SsdMobilenetV2FeatureExtractorTest( ...@@ -71,9 +108,9 @@ class SsdMobilenetV2FeatureExtractorTest(
(2, 1, 1, 256), (2, 1, 1, 128)] (2, 1, 1, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shapes_with_dynamic_inputs( self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
2, image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape) expected_feature_map_shape, use_keras=use_keras)
def test_extract_features_returns_correct_shapes_299(self): def test_extract_features_returns_correct_shapes_299(self, use_keras):
image_height = 299 image_height = 299
image_width = 299 image_width = 299
depth_multiplier = 1.0 depth_multiplier = 1.0
...@@ -83,9 +120,10 @@ class SsdMobilenetV2FeatureExtractorTest( ...@@ -83,9 +120,10 @@ class SsdMobilenetV2FeatureExtractorTest(
(2, 2, 2, 256), (2, 1, 1, 128)] (2, 2, 2, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape) expected_feature_map_shape, use_keras=use_keras)
def test_extract_features_returns_correct_shapes_enforcing_min_depth(self): def test_extract_features_returns_correct_shapes_enforcing_min_depth(
self, use_keras):
image_height = 299 image_height = 299
image_width = 299 image_width = 299
depth_multiplier = 0.5**12 depth_multiplier = 0.5**12
...@@ -95,9 +133,10 @@ class SsdMobilenetV2FeatureExtractorTest( ...@@ -95,9 +133,10 @@ class SsdMobilenetV2FeatureExtractorTest(
(2, 2, 2, 32), (2, 1, 1, 32)] (2, 2, 2, 32), (2, 1, 1, 32)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape) expected_feature_map_shape, use_keras=use_keras)
def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self): def test_extract_features_returns_correct_shapes_with_pad_to_multiple(
self, use_keras):
image_height = 299 image_height = 299
image_width = 299 image_width = 299
depth_multiplier = 1.0 depth_multiplier = 1.0
...@@ -107,35 +146,45 @@ class SsdMobilenetV2FeatureExtractorTest( ...@@ -107,35 +146,45 @@ class SsdMobilenetV2FeatureExtractorTest(
(2, 2, 2, 256), (2, 1, 1, 128)] (2, 2, 2, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape) expected_feature_map_shape, use_keras=use_keras)
def test_extract_features_raises_error_with_invalid_image_size(self): def test_extract_features_raises_error_with_invalid_image_size(
self, use_keras):
image_height = 32 image_height = 32
image_width = 32 image_width = 32
depth_multiplier = 1.0 depth_multiplier = 1.0
pad_to_multiple = 1 pad_to_multiple = 1
self.check_extract_features_raises_error_with_invalid_image_size( self.check_extract_features_raises_error_with_invalid_image_size(
image_height, image_width, depth_multiplier, pad_to_multiple) image_height, image_width, depth_multiplier, pad_to_multiple,
use_keras=use_keras)
def test_preprocess_returns_correct_value_range(self): def test_preprocess_returns_correct_value_range(self, use_keras):
image_height = 128 image_height = 128
image_width = 128 image_width = 128
depth_multiplier = 1 depth_multiplier = 1
pad_to_multiple = 1 pad_to_multiple = 1
test_image = np.random.rand(4, image_height, image_width, 3) test_image = np.random.rand(4, image_height, image_width, 3)
feature_extractor = self._create_feature_extractor(depth_multiplier, feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple) pad_to_multiple,
use_keras=use_keras)
preprocessed_image = feature_extractor.preprocess(test_image) preprocessed_image = feature_extractor.preprocess(test_image)
self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0))) self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
def test_variables_only_created_in_scope(self): def test_variables_only_created_in_scope(self, use_keras):
depth_multiplier = 1 depth_multiplier = 1
pad_to_multiple = 1 pad_to_multiple = 1
scope_name = 'MobilenetV2' scope_name = 'MobilenetV2'
self.check_feature_extractor_variables_under_scope( self.check_feature_extractor_variables_under_scope(
depth_multiplier, pad_to_multiple, scope_name) depth_multiplier, pad_to_multiple, scope_name, use_keras=use_keras)
def test_variable_count(self, use_keras):
depth_multiplier = 1
pad_to_multiple = 1
variables = self.get_feature_extractor_variables(
depth_multiplier, pad_to_multiple, use_keras=use_keras)
self.assertEqual(len(variables), 292)
def test_has_fused_batchnorm(self): def test_has_fused_batchnorm(self, use_keras):
image_height = 40 image_height = 40
image_width = 40 image_width = 40
depth_multiplier = 1 depth_multiplier = 1
...@@ -143,9 +192,13 @@ class SsdMobilenetV2FeatureExtractorTest( ...@@ -143,9 +192,13 @@ class SsdMobilenetV2FeatureExtractorTest(
image_placeholder = tf.placeholder(tf.float32, image_placeholder = tf.placeholder(tf.float32,
[1, image_height, image_width, 3]) [1, image_height, image_width, 3])
feature_extractor = self._create_feature_extractor(depth_multiplier, feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple) pad_to_multiple,
use_keras=use_keras)
preprocessed_image = feature_extractor.preprocess(image_placeholder) preprocessed_image = feature_extractor.preprocess(image_placeholder)
_ = feature_extractor.extract_features(preprocessed_image) if use_keras:
_ = feature_extractor(preprocessed_image)
else:
_ = feature_extractor.extract_features(preprocessed_image)
self.assertTrue(any(op.type == 'FusedBatchNorm' self.assertTrue(any(op.type == 'FusedBatchNorm'
for op in tf.get_default_graph().get_operations())) for op in tf.get_default_graph().get_operations()))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment