Commit 27b4acd4 authored by Aman Gupta's avatar Aman Gupta
Browse files

Merge remote-tracking branch 'upstream/master'

parents 5133522f d4e1f97f
......@@ -14,8 +14,12 @@
# ==============================================================================
"""Tests for object_detection.meta_architectures.faster_rcnn_meta_arch."""
import functools
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
from google.protobuf import text_format
from object_detection.anchor_generators import grid_anchor_generator
from object_detection.builders import box_predictor_builder
......@@ -23,11 +27,14 @@ from object_detection.builders import hyperparams_builder
from object_detection.builders import post_processing_builder
from object_detection.core import balanced_positive_negative_sampler as sampler
from object_detection.core import losses
from object_detection.core import post_processing
from object_detection.core import target_assigner
from object_detection.meta_architectures import faster_rcnn_meta_arch
from object_detection.protos import box_predictor_pb2
from object_detection.protos import hyperparams_pb2
from object_detection.protos import post_processing_pb2
from object_detection.utils import ops
from object_detection.utils import test_case
from object_detection.utils import test_utils
slim = tf.contrib.slim
......@@ -60,7 +67,7 @@ class FakeFasterRCNNFeatureExtractor(
num_outputs=3, kernel_size=1, scope='layer2')
class FasterRCNNMetaArchTestBase(tf.test.TestCase):
class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
"""Base class to test Faster R-CNN and R-FCN meta architectures."""
def _build_arg_scope_with_hyperparams(self,
......@@ -157,7 +164,8 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
masks_are_class_agnostic=False,
use_matmul_crop_and_resize=False,
clip_anchors_to_image=False,
use_matmul_gather_in_matcher=False):
use_matmul_gather_in_matcher=False,
use_static_shapes=False):
def image_resizer_fn(image, masks=None):
"""Fake image resizer function."""
......@@ -220,11 +228,18 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
first_stage_box_predictor_depth = 512
first_stage_minibatch_size = 3
first_stage_sampler = sampler.BalancedPositiveNegativeSampler(
positive_fraction=0.5, is_static=False)
positive_fraction=0.5, is_static=use_static_shapes)
first_stage_nms_score_threshold = -1.0
first_stage_nms_iou_threshold = 1.0
first_stage_max_proposals = first_stage_max_proposals
first_stage_non_max_suppression_fn = functools.partial(
post_processing.batch_multiclass_non_max_suppression,
score_thresh=first_stage_nms_score_threshold,
iou_thresh=first_stage_nms_iou_threshold,
max_size_per_class=first_stage_max_proposals,
max_total_size=first_stage_max_proposals,
use_static_shapes=use_static_shapes)
first_stage_localization_loss_weight = 1.0
first_stage_objectness_loss_weight = 1.0
......@@ -246,7 +261,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
second_stage_non_max_suppression_fn, _ = post_processing_builder.build(
post_processing_config)
second_stage_sampler = sampler.BalancedPositiveNegativeSampler(
positive_fraction=1.0, is_static=False)
positive_fraction=1.0, is_static=use_static_shapes)
second_stage_score_conversion_fn = tf.identity
second_stage_localization_loss_weight = 1.0
......@@ -268,6 +283,9 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
loc_loss_weight=second_stage_localization_loss_weight,
max_negatives_per_positive=None)
crop_and_resize_fn = (
ops.matmul_crop_and_resize
if use_matmul_crop_and_resize else ops.native_crop_and_resize)
common_kwargs = {
'is_training': is_training,
'num_classes': num_classes,
......@@ -284,8 +302,8 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
'first_stage_minibatch_size': first_stage_minibatch_size,
'first_stage_sampler': first_stage_sampler,
'first_stage_nms_score_threshold': first_stage_nms_score_threshold,
'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold,
'first_stage_non_max_suppression_fn':
first_stage_non_max_suppression_fn,
'first_stage_max_proposals': first_stage_max_proposals,
'first_stage_localization_loss_weight':
first_stage_localization_loss_weight,
......@@ -304,8 +322,10 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
'second_stage_classification_loss':
second_stage_classification_loss,
'hard_example_miner': hard_example_miner,
'use_matmul_crop_and_resize': use_matmul_crop_and_resize,
'clip_anchors_to_image': clip_anchors_to_image
'crop_and_resize_fn': crop_and_resize_fn,
'clip_anchors_to_image': clip_anchors_to_image,
'use_static_shapes': use_static_shapes,
'resize_masks': True,
}
return self._get_model(
......@@ -412,7 +432,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
anchors = prediction_out['anchors']
self.assertTrue(len(anchors.shape) == 2 and anchors.shape[1] == 4)
num_anchors_out = anchors.shape[0]
self.assertTrue(num_anchors_out < num_anchors_strict_upper_bound)
self.assertLess(num_anchors_out, num_anchors_strict_upper_bound)
self.assertTrue(np.all(np.greater_equal(anchors, 0)))
self.assertTrue(np.all(np.less_equal(anchors[:, 0], height)))
......@@ -484,94 +504,97 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
for key in expected_shapes:
self.assertAllEqual(tensor_dict_out[key].shape, expected_shapes[key])
def _test_predict_gives_correct_shapes_in_train_mode_both_stages(
self, use_matmul_crop_and_resize=False,
clip_anchors_to_image=False):
test_graph = tf.Graph()
with test_graph.as_default():
def test_predict_gives_correct_shapes_in_train_mode_both_stages(
self,
use_static_shapes=False):
batch_size = 2
image_size = 10
max_num_proposals = 7
initial_crop_size = 3
maxpool_stride = 1
def graph_fn(images, gt_boxes, gt_classes, gt_weights):
"""Function to construct tf graph for the test."""
model = self._build_model(
is_training=True,
number_of_stages=2,
second_stage_batch_size=7,
predict_masks=False,
use_matmul_crop_and_resize=use_matmul_crop_and_resize,
clip_anchors_to_image=clip_anchors_to_image)
use_matmul_crop_and_resize=use_static_shapes,
clip_anchors_to_image=use_static_shapes,
use_static_shapes=use_static_shapes)
batch_size = 2
image_size = 10
max_num_proposals = 7
initial_crop_size = 3
maxpool_stride = 1
image_shape = (batch_size, image_size, image_size, 3)
preprocessed_inputs = tf.zeros(image_shape, dtype=tf.float32)
groundtruth_boxes_list = [
tf.constant([[0, 0, .5, .5], [.5, .5, 1, 1]], dtype=tf.float32),
tf.constant([[0, .5, .5, 1], [.5, 0, 1, .5]], dtype=tf.float32)]
groundtruth_classes_list = [
tf.constant([[1, 0], [0, 1]], dtype=tf.float32),
tf.constant([[1, 0], [1, 0]], dtype=tf.float32)]
groundtruth_weights_list = [
tf.constant([1, 1], dtype=tf.float32),
tf.constant([1, 1], dtype=tf.float32)]
_, true_image_shapes = model.preprocess(tf.zeros(image_shape))
preprocessed_inputs, true_image_shapes = model.preprocess(images)
model.provide_groundtruth(
groundtruth_boxes_list,
groundtruth_classes_list,
groundtruth_weights_list=groundtruth_weights_list)
groundtruth_boxes_list=tf.unstack(gt_boxes),
groundtruth_classes_list=tf.unstack(gt_classes),
groundtruth_weights_list=tf.unstack(gt_weights))
result_tensor_dict = model.predict(preprocessed_inputs, true_image_shapes)
expected_shapes = {
'rpn_box_predictor_features':
(2, image_size, image_size, 512),
'rpn_features_to_crop': (2, image_size, image_size, 3),
'image_shape': (4,),
'refined_box_encodings': (2 * max_num_proposals, 2, 4),
'class_predictions_with_background': (2 * max_num_proposals, 2 + 1),
'num_proposals': (2,),
'proposal_boxes': (2, max_num_proposals, 4),
'proposal_boxes_normalized': (2, max_num_proposals, 4),
'box_classifier_features':
self._get_box_classifier_features_shape(image_size,
batch_size,
max_num_proposals,
initial_crop_size,
maxpool_stride,
3)
}
init_op = tf.global_variables_initializer()
with self.test_session(graph=test_graph) as sess:
sess.run(init_op)
tensor_dict_out = sess.run(result_tensor_dict)
self.assertEqual(set(tensor_dict_out.keys()),
set(expected_shapes.keys()).union(set([
'rpn_box_encodings',
'rpn_objectness_predictions_with_background',
'anchors'])))
for key in expected_shapes:
self.assertAllEqual(tensor_dict_out[key].shape, expected_shapes[key])
anchors_shape_out = tensor_dict_out['anchors'].shape
self.assertEqual(2, len(anchors_shape_out))
self.assertEqual(4, anchors_shape_out[1])
num_anchors_out = anchors_shape_out[0]
self.assertAllEqual(tensor_dict_out['rpn_box_encodings'].shape,
(2, num_anchors_out, 4))
self.assertAllEqual(
tensor_dict_out['rpn_objectness_predictions_with_background'].shape,
(2, num_anchors_out, 2))
def test_predict_gives_correct_shapes_in_train_mode_both_stages(self):
self._test_predict_gives_correct_shapes_in_train_mode_both_stages()
def test_predict_gives_correct_shapes_in_train_mode_matmul_crop_resize(self):
self._test_predict_gives_correct_shapes_in_train_mode_both_stages(
use_matmul_crop_and_resize=True)
return (result_tensor_dict['refined_box_encodings'],
result_tensor_dict['class_predictions_with_background'],
result_tensor_dict['proposal_boxes'],
result_tensor_dict['proposal_boxes_normalized'],
result_tensor_dict['anchors'],
result_tensor_dict['rpn_box_encodings'],
result_tensor_dict['rpn_objectness_predictions_with_background'],
result_tensor_dict['rpn_features_to_crop'],
result_tensor_dict['rpn_box_predictor_features'],
)
image_shape = (batch_size, image_size, image_size, 3)
images = np.zeros(image_shape, dtype=np.float32)
gt_boxes = np.stack([
np.array([[0, 0, .5, .5], [.5, .5, 1, 1]], dtype=np.float32),
np.array([[0, .5, .5, 1], [.5, 0, 1, .5]], dtype=np.float32)
])
gt_classes = np.stack([
np.array([[1, 0], [0, 1]], dtype=np.float32),
np.array([[1, 0], [1, 0]], dtype=np.float32)
])
gt_weights = np.stack([
np.array([1, 1], dtype=np.float32),
np.array([1, 1], dtype=np.float32)
])
if use_static_shapes:
results = self.execute(graph_fn,
[images, gt_boxes, gt_classes, gt_weights])
else:
results = self.execute_cpu(graph_fn,
[images, gt_boxes, gt_classes, gt_weights])
def test_predict_gives_correct_shapes_in_train_mode_clip_anchors(self):
self._test_predict_gives_correct_shapes_in_train_mode_both_stages(
clip_anchors_to_image=True)
expected_shapes = {
'rpn_box_predictor_features': (2, image_size, image_size, 512),
'rpn_features_to_crop': (2, image_size, image_size, 3),
'refined_box_encodings': (2 * max_num_proposals, 2, 4),
'class_predictions_with_background': (2 * max_num_proposals, 2 + 1),
'proposal_boxes': (2, max_num_proposals, 4),
'rpn_box_encodings': (2, image_size * image_size * 9, 4),
'proposal_boxes_normalized': (2, max_num_proposals, 4),
'box_classifier_features':
self._get_box_classifier_features_shape(
image_size, batch_size, max_num_proposals, initial_crop_size,
maxpool_stride, 3),
'rpn_objectness_predictions_with_background':
(2, image_size * image_size * 9, 2)
}
# TODO(rathodv): Possibly change utils/test_case.py to accept dictionaries
# and return dicionaries so don't have to rely on the order of tensors.
self.assertAllEqual(results[0].shape,
expected_shapes['refined_box_encodings'])
self.assertAllEqual(results[1].shape,
expected_shapes['class_predictions_with_background'])
self.assertAllEqual(results[2].shape, expected_shapes['proposal_boxes'])
self.assertAllEqual(results[3].shape,
expected_shapes['proposal_boxes_normalized'])
anchors_shape = results[4].shape
self.assertAllEqual(results[5].shape,
[batch_size, anchors_shape[0], 4])
self.assertAllEqual(results[6].shape,
[batch_size, anchors_shape[0], 2])
self.assertAllEqual(results[7].shape,
expected_shapes['rpn_features_to_crop'])
self.assertAllEqual(results[8].shape,
expected_shapes['rpn_box_predictor_features'])
def _test_postprocess_first_stage_only_inference_mode(
self, pad_to_max_dimension=None):
......@@ -848,10 +871,10 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
loss_dict_out = sess.run(loss_dict)
self.assertAllClose(loss_dict_out['Loss/RPNLoss/localization_loss'], 0)
self.assertAllClose(loss_dict_out['Loss/RPNLoss/objectness_loss'], 0)
self.assertTrue('Loss/BoxClassifierLoss/localization_loss'
not in loss_dict_out)
self.assertTrue('Loss/BoxClassifierLoss/classification_loss'
not in loss_dict_out)
self.assertNotIn('Loss/BoxClassifierLoss/localization_loss',
loss_dict_out)
self.assertNotIn('Loss/BoxClassifierLoss/classification_loss',
loss_dict_out)
# TODO(rathodv): Split test into two - with and without masks.
def test_loss_full(self):
......@@ -1157,22 +1180,58 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
'Loss/BoxClassifierLoss/classification_loss'], 0)
self.assertAllClose(loss_dict_out['Loss/BoxClassifierLoss/mask_loss'], 0)
def test_loss_full_zero_padded_proposals_nonzero_loss_with_two_images(self):
model = self._build_model(
is_training=True, number_of_stages=2, second_stage_batch_size=6)
def test_loss_full_zero_padded_proposals_nonzero_loss_with_two_images(
self, use_static_shapes=False, shared_boxes=False):
batch_size = 2
anchors = tf.constant(
first_stage_max_proposals = 8
second_stage_batch_size = 6
num_classes = 2
def graph_fn(anchors, rpn_box_encodings,
rpn_objectness_predictions_with_background, images,
num_proposals, proposal_boxes, refined_box_encodings,
class_predictions_with_background, groundtruth_boxes,
groundtruth_classes):
"""Function to construct tf graph for the test."""
model = self._build_model(
is_training=True, number_of_stages=2,
second_stage_batch_size=second_stage_batch_size,
first_stage_max_proposals=first_stage_max_proposals,
num_classes=num_classes,
use_matmul_crop_and_resize=use_static_shapes,
clip_anchors_to_image=use_static_shapes,
use_static_shapes=use_static_shapes)
prediction_dict = {
'rpn_box_encodings': rpn_box_encodings,
'rpn_objectness_predictions_with_background':
rpn_objectness_predictions_with_background,
'image_shape': tf.shape(images),
'anchors': anchors,
'refined_box_encodings': refined_box_encodings,
'class_predictions_with_background':
class_predictions_with_background,
'proposal_boxes': proposal_boxes,
'num_proposals': num_proposals
}
_, true_image_shapes = model.preprocess(images)
model.provide_groundtruth(tf.unstack(groundtruth_boxes),
tf.unstack(groundtruth_classes))
loss_dict = model.loss(prediction_dict, true_image_shapes)
return (loss_dict['Loss/RPNLoss/localization_loss'],
loss_dict['Loss/RPNLoss/objectness_loss'],
loss_dict['Loss/BoxClassifierLoss/localization_loss'],
loss_dict['Loss/BoxClassifierLoss/classification_loss'])
anchors = np.array(
[[0, 0, 16, 16],
[0, 16, 16, 32],
[16, 0, 32, 16],
[16, 16, 32, 32]], dtype=tf.float32)
rpn_box_encodings = tf.zeros(
[batch_size,
anchors.get_shape().as_list()[0],
BOX_CODE_SIZE], dtype=tf.float32)
[16, 16, 32, 32]], dtype=np.float32)
rpn_box_encodings = np.zeros(
[batch_size, anchors.shape[1], BOX_CODE_SIZE], dtype=np.float32)
# use different numbers for the objectness category to break ties in
# order of boxes returned by NMS
rpn_objectness_predictions_with_background = tf.constant(
rpn_objectness_predictions_with_background = np.array(
[[[-10, 13],
[10, -10],
[10, -11],
......@@ -1180,13 +1239,13 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
[[-10, 13],
[10, -10],
[10, -11],
[10, -12]]], dtype=tf.float32)
image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32)
[10, -12]]], dtype=np.float32)
images = np.zeros([batch_size, 32, 32, 3], dtype=np.float32)
# box_classifier_batch_size is 6, but here we assume that the number of
# actual proposals (not counting zero paddings) is fewer.
num_proposals = tf.constant([3, 2], dtype=tf.int32)
proposal_boxes = tf.constant(
num_proposals = np.array([3, 2], dtype=np.int32)
proposal_boxes = np.array(
[[[0, 0, 16, 16],
[0, 16, 16, 32],
[16, 0, 32, 16],
......@@ -1198,13 +1257,13 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
[0, 0, 0, 0], # begin paddings
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]]], dtype=tf.float32)
[0, 0, 0, 0]]], dtype=np.float32)
refined_box_encodings = tf.zeros(
(batch_size * model.max_num_proposals,
model.num_classes,
BOX_CODE_SIZE), dtype=tf.float32)
class_predictions_with_background = tf.constant(
refined_box_encodings = np.zeros(
(batch_size * second_stage_batch_size, 1
if shared_boxes else num_classes, BOX_CODE_SIZE),
dtype=np.float32)
class_predictions_with_background = np.array(
[[-10, 10, -10], # first image
[10, -10, -10],
[10, -10, -10],
......@@ -1216,7 +1275,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
[0, 0, 0], # begin paddings
[0, 0, 0],
[0, 0, 0],
[0, 0, 0],], dtype=tf.float32)
[0, 0, 0],], dtype=np.float32)
# The first groundtruth box is 4/5 of the anchor size in both directions
# experiencing a loss of:
......@@ -1225,38 +1284,29 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
# The second groundtruth box is identical to the prediction and thus
# experiences zero loss.
# Total average loss is (abs(5 * log(1/2)) - .5) / 3.
groundtruth_boxes_list = [
tf.constant([[0.05, 0.05, 0.45, 0.45]], dtype=tf.float32),
tf.constant([[0.0, 0.0, 0.5, 0.5]], dtype=tf.float32)]
groundtruth_classes_list = [tf.constant([[1, 0]], dtype=tf.float32),
tf.constant([[0, 1]], dtype=tf.float32)]
exp_loc_loss = (-5 * np.log(.8) - 0.5) / 3.0
groundtruth_boxes = np.stack([
np.array([[0.05, 0.05, 0.45, 0.45]], dtype=np.float32),
np.array([[0.0, 0.0, 0.5, 0.5]], dtype=np.float32)])
groundtruth_classes = np.stack([np.array([[1, 0]], dtype=np.float32),
np.array([[0, 1]], dtype=np.float32)])
execute_fn = self.execute_cpu
if use_static_shapes:
execute_fn = self.execute
results = execute_fn(graph_fn, [
anchors, rpn_box_encodings, rpn_objectness_predictions_with_background,
images, num_proposals, proposal_boxes, refined_box_encodings,
class_predictions_with_background, groundtruth_boxes,
groundtruth_classes
])
prediction_dict = {
'rpn_box_encodings': rpn_box_encodings,
'rpn_objectness_predictions_with_background':
rpn_objectness_predictions_with_background,
'image_shape': image_shape,
'anchors': anchors,
'refined_box_encodings': refined_box_encodings,
'class_predictions_with_background': class_predictions_with_background,
'proposal_boxes': proposal_boxes,
'num_proposals': num_proposals
}
_, true_image_shapes = model.preprocess(tf.zeros(image_shape))
model.provide_groundtruth(groundtruth_boxes_list,
groundtruth_classes_list)
loss_dict = model.loss(prediction_dict, true_image_shapes)
exp_loc_loss = (-5 * np.log(.8) - 0.5) / 3.0
with self.test_session() as sess:
loss_dict_out = sess.run(loss_dict)
self.assertAllClose(loss_dict_out['Loss/RPNLoss/localization_loss'],
exp_loc_loss)
self.assertAllClose(loss_dict_out['Loss/RPNLoss/objectness_loss'], 0)
self.assertAllClose(loss_dict_out[
'Loss/BoxClassifierLoss/localization_loss'], exp_loc_loss)
self.assertAllClose(loss_dict_out[
'Loss/BoxClassifierLoss/classification_loss'], 0)
self.assertAllClose(results[0], exp_loc_loss, rtol=1e-4, atol=1e-4)
self.assertAllClose(results[1], 0.0)
self.assertAllClose(results[2], exp_loc_loss, rtol=1e-4, atol=1e-4)
self.assertAllClose(results[3], 0.0)
def test_loss_with_hard_mining(self):
model = self._build_model(is_training=True,
......@@ -1346,10 +1396,14 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
self.assertAllClose(loss_dict_out[
'Loss/BoxClassifierLoss/classification_loss'], 0)
def test_loss_full_with_shared_boxes(self):
model = self._build_model(
is_training=True, number_of_stages=2, second_stage_batch_size=6)
def test_loss_with_hard_mining_and_losses_mask(self):
model = self._build_model(is_training=True,
number_of_stages=2,
second_stage_batch_size=None,
first_stage_max_proposals=6,
hard_mining=True)
batch_size = 2
number_of_proposals = 3
anchors = tf.constant(
[[0, 0, 16, 16],
[0, 16, 16, 32],
......@@ -1361,63 +1415,77 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
BOX_CODE_SIZE], dtype=tf.float32)
# use different numbers for the objectness category to break ties in
# order of boxes returned by NMS
rpn_objectness_predictions_with_background = tf.constant([
[[-10, 13],
[10, -10],
[10, -11],
[-10, 12]],
[[10, -10],
[-10, 13],
[-10, 12],
[10, -11]]], dtype=tf.float32)
rpn_objectness_predictions_with_background = tf.constant(
[[[-10, 13],
[-10, 12],
[10, -11],
[10, -12]],
[[-10, 13],
[-10, 12],
[10, -11],
[10, -12]]], dtype=tf.float32)
image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32)
num_proposals = tf.constant([6, 6], dtype=tf.int32)
# box_classifier_batch_size is 6, but here we assume that the number of
# actual proposals (not counting zero paddings) is fewer (3).
num_proposals = tf.constant([number_of_proposals, number_of_proposals],
dtype=tf.int32)
proposal_boxes = tf.constant(
2 * [[[0, 0, 16, 16],
[0, 16, 16, 32],
[16, 0, 32, 16],
[16, 16, 32, 32],
[0, 0, 16, 16],
[0, 16, 16, 32]]], dtype=tf.float32)
[[[0, 0, 16, 16], # first image
[0, 16, 16, 32],
[16, 0, 32, 16],
[0, 0, 0, 0], # begin paddings
[0, 0, 0, 0],
[0, 0, 0, 0]],
[[0, 0, 16, 16], # second image
[0, 16, 16, 32],
[16, 0, 32, 16],
[0, 0, 0, 0], # begin paddings
[0, 0, 0, 0],
[0, 0, 0, 0]]], dtype=tf.float32)
refined_box_encodings = tf.zeros(
(batch_size * model.max_num_proposals,
1, # one box shared among all the classes
model.num_classes,
BOX_CODE_SIZE), dtype=tf.float32)
class_predictions_with_background = tf.constant(
[[-10, 10, -10], # first image
[10, -10, -10],
[10, -10, -10],
[-10, -10, 10],
[-10, 10, -10],
[10, -10, -10],
[10, -10, -10], # second image
[-10, 10, -10],
[-10, 10, -10],
[10, -10, -10],
[0, 0, 0], # begin paddings
[0, 0, 0],
[0, 0, 0],
[-10, 10, -10], # second image
[-10, -10, 10],
[10, -10, -10],
[-10, 10, -10]], dtype=tf.float32)
mask_predictions_logits = 20 * tf.ones((batch_size *
model.max_num_proposals,
model.num_classes,
14, 14),
dtype=tf.float32)
[0, 0, 0], # begin paddings
[0, 0, 0],
[0, 0, 0]], dtype=tf.float32)
# The first groundtruth box is 4/5 of the anchor size in both directions
# experiencing a loss of:
# 2 * SmoothL1(5 * log(4/5)) / (num_proposals * batch_size)
# = 2 * (abs(5 * log(1/2)) - .5) / 3
# The second groundtruth box is 46/50 of the anchor size in both directions
# experiencing a loss of:
# 2 * SmoothL1(5 * log(42/50)) / (num_proposals * batch_size)
# = 2 * (.5(5 * log(.92))^2 - .5) / 3.
# Since the first groundtruth box experiences greater loss, and we have
# set num_hard_examples=1 in the HardMiner, the final localization loss
# corresponds to that of the first groundtruth box.
groundtruth_boxes_list = [
tf.constant([[0, 0, .5, .5], [.5, .5, 1, 1]], dtype=tf.float32),
tf.constant([[0, .5, .5, 1], [.5, 0, 1, .5]], dtype=tf.float32)]
groundtruth_classes_list = [tf.constant([[1, 0], [0, 1]], dtype=tf.float32),
tf.constant([[1, 0], [1, 0]], dtype=tf.float32)]
tf.constant([[0.05, 0.05, 0.45, 0.45],
[0.02, 0.52, 0.48, 0.98]], dtype=tf.float32),
tf.constant([[0.05, 0.05, 0.45, 0.45],
[0.02, 0.52, 0.48, 0.98]], dtype=tf.float32)]
groundtruth_classes_list = [
tf.constant([[1, 0], [0, 1]], dtype=tf.float32),
tf.constant([[1, 0], [0, 1]], dtype=tf.float32)]
is_annotated_list = [tf.constant(True, dtype=tf.bool),
tf.constant(False, dtype=tf.bool)]
exp_loc_loss = (2 * (-5 * np.log(.8) - 0.5) /
(number_of_proposals * batch_size))
# Set all elements of groundtruth mask to 1.0. In this case all proposal
# crops of the groundtruth masks should return a mask that covers the entire
# proposal. Thus, if mask_predictions_logits element values are all greater
# than 20, the loss should be zero.
groundtruth_masks_list = [tf.convert_to_tensor(np.ones((2, 32, 32)),
dtype=tf.float32),
tf.convert_to_tensor(np.ones((2, 32, 32)),
dtype=tf.float32)]
prediction_dict = {
'rpn_box_encodings': rpn_box_encodings,
'rpn_objectness_predictions_with_background':
......@@ -1427,24 +1495,20 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
'refined_box_encodings': refined_box_encodings,
'class_predictions_with_background': class_predictions_with_background,
'proposal_boxes': proposal_boxes,
'num_proposals': num_proposals,
'mask_predictions': mask_predictions_logits
'num_proposals': num_proposals
}
_, true_image_shapes = model.preprocess(tf.zeros(image_shape))
model.provide_groundtruth(groundtruth_boxes_list,
groundtruth_classes_list,
groundtruth_masks_list)
is_annotated_list=is_annotated_list)
loss_dict = model.loss(prediction_dict, true_image_shapes)
with self.test_session() as sess:
loss_dict_out = sess.run(loss_dict)
self.assertAllClose(loss_dict_out['Loss/RPNLoss/localization_loss'], 0)
self.assertAllClose(loss_dict_out['Loss/RPNLoss/objectness_loss'], 0)
self.assertAllClose(loss_dict_out[
'Loss/BoxClassifierLoss/localization_loss'], 0)
'Loss/BoxClassifierLoss/localization_loss'], exp_loc_loss)
self.assertAllClose(loss_dict_out[
'Loss/BoxClassifierLoss/classification_loss'], 0)
self.assertAllClose(loss_dict_out['Loss/BoxClassifierLoss/mask_loss'], 0)
def test_restore_map_for_classification_ckpt(self):
# Define mock tensorflow classification graph and save variables.
......
......@@ -62,11 +62,11 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
first_stage_box_predictor_depth,
first_stage_minibatch_size,
first_stage_sampler,
first_stage_nms_score_threshold,
first_stage_nms_iou_threshold,
first_stage_non_max_suppression_fn,
first_stage_max_proposals,
first_stage_localization_loss_weight,
first_stage_objectness_loss_weight,
crop_and_resize_fn,
second_stage_target_assigner,
second_stage_rfcn_box_predictor,
second_stage_batch_size,
......@@ -79,8 +79,9 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
hard_example_miner,
parallel_iterations=16,
add_summaries=True,
use_matmul_crop_and_resize=False,
clip_anchors_to_image=False):
clip_anchors_to_image=False,
use_static_shapes=False,
resize_masks=False):
"""RFCNMetaArch Constructor.
Args:
......@@ -123,18 +124,22 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
only called "batch_size" due to terminology from the Faster R-CNN paper.
first_stage_sampler: The sampler for the boxes used to calculate the RPN
loss after the first stage.
first_stage_nms_score_threshold: Score threshold for non max suppression
for the Region Proposal Network (RPN). This value is expected to be in
[0, 1] as it is applied directly after a softmax transformation. The
recommended value for Faster R-CNN is 0.
first_stage_nms_iou_threshold: The Intersection Over Union (IOU) threshold
for performing Non-Max Suppression (NMS) on the boxes predicted by the
Region Proposal Network (RPN).
first_stage_non_max_suppression_fn: batch_multiclass_non_max_suppression
callable that takes `boxes`, `scores` and optional `clip_window`(with
all other inputs already set) and returns a dictionary containing
tensors with keys: `detection_boxes`, `detection_scores`,
`detection_classes`, `num_detections`. This is used to perform non max
suppression on the boxes predicted by the Region Proposal Network
(RPN).
See `post_processing.batch_multiclass_non_max_suppression` for the type
and shape of these tensors.
first_stage_max_proposals: Maximum number of boxes to retain after
performing Non-Max Suppression (NMS) on the boxes predicted by the
Region Proposal Network (RPN).
first_stage_localization_loss_weight: A float
first_stage_objectness_loss_weight: A float
crop_and_resize_fn: A differentiable resampler to use for cropping RPN
proposal features.
second_stage_target_assigner: Target assigner to use for second stage of
R-FCN. If the model is configured with multiple prediction heads, this
target assigner is used to generate targets for all heads (with the
......@@ -168,12 +173,13 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
in parallel for calls to tf.map_fn.
add_summaries: boolean (default: True) controlling whether summary ops
should be added to tensorflow graph.
use_matmul_crop_and_resize: Force the use of matrix multiplication based
crop and resize instead of standard tf.image.crop_and_resize while
computing second stage input feature maps.
clip_anchors_to_image: The anchors generated are clip to the
window size without filtering the nonoverlapping anchors. This generates
a static number of anchors. This argument is unused.
use_static_shapes: If True, uses implementation of ops with static shape
guarantees.
resize_masks: Indicates whether the masks presend in the groundtruth
should be resized in the model with `image_resizer_fn`
Raises:
ValueError: If `second_stage_batch_size` > `first_stage_max_proposals`
......@@ -196,11 +202,11 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
first_stage_box_predictor_depth,
first_stage_minibatch_size,
first_stage_sampler,
first_stage_nms_score_threshold,
first_stage_nms_iou_threshold,
first_stage_non_max_suppression_fn,
first_stage_max_proposals,
first_stage_localization_loss_weight,
first_stage_objectness_loss_weight,
crop_and_resize_fn,
None, # initial_crop_size is not used in R-FCN
None, # maxpool_kernel_size is not use in R-FCN
None, # maxpool_stride is not use in R-FCN
......@@ -215,7 +221,11 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
second_stage_classification_loss,
1.0, # second stage mask prediction loss weight isn't used in R-FCN.
hard_example_miner,
parallel_iterations)
parallel_iterations,
add_summaries,
clip_anchors_to_image,
use_static_shapes,
resize_masks)
self._rfcn_box_predictor = second_stage_rfcn_box_predictor
......
......@@ -125,12 +125,13 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams_config,
conv_hyperparams,
freeze_batchnorm,
inplace_batchnorm_update,
use_explicit_padding=False,
use_depthwise=False,
override_base_feature_extractor_hyperparams=False):
override_base_feature_extractor_hyperparams=False,
name=None):
"""Constructor.
Args:
......@@ -139,9 +140,9 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams_config: A hyperparams.proto object containing
convolution hyperparameters for the layers added on top of the
base feature extractor.
conv_hyperparams: `hyperparams_builder.KerasLayerHyperparams` object
containing convolution hyperparameters for the layers added on top of
the base feature extractor.
freeze_batchnorm: Whether to freeze batch norm parameters during
training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm
......@@ -156,14 +157,16 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_config`.
name: A string name scope to assign to the model. If 'None', Keras
will auto-generate one from the class name.
"""
super(SSDKerasFeatureExtractor, self).__init__()
super(SSDKerasFeatureExtractor, self).__init__(name=name)
self._is_training = is_training
self._depth_multiplier = depth_multiplier
self._min_depth = min_depth
self._pad_to_multiple = pad_to_multiple
self._conv_hyperparams_config = conv_hyperparams_config
self._conv_hyperparams = conv_hyperparams
self._freeze_batchnorm = freeze_batchnorm
self._inplace_batchnorm_update = inplace_batchnorm_update
self._use_explicit_padding = use_explicit_padding
......@@ -225,10 +228,7 @@ class SSDMetaArch(model.DetectionModel):
box_predictor,
box_coder,
feature_extractor,
matcher,
region_similarity_calculator,
encode_background_as_zeros,
negative_class_weight,
image_resizer_fn,
non_max_suppression_fn,
score_conversion_fn,
......@@ -238,14 +238,14 @@ class SSDMetaArch(model.DetectionModel):
localization_loss_weight,
normalize_loss_by_num_matches,
hard_example_miner,
target_assigner_instance,
add_summaries=True,
normalize_loc_loss_by_codesize=False,
freeze_batchnorm=False,
inplace_batchnorm_update=False,
add_background_class=True,
random_example_sampler=None,
expected_classification_loss_under_sampling=None,
target_assigner_instance=None):
expected_classification_loss_under_sampling=None):
"""SSDMetaArch Constructor.
TODO(rathodv,jonathanhuang): group NMS parameters + score converter into
......@@ -259,13 +259,9 @@ class SSDMetaArch(model.DetectionModel):
box_predictor: a box_predictor.BoxPredictor object.
box_coder: a box_coder.BoxCoder object.
feature_extractor: a SSDFeatureExtractor object.
matcher: a matcher.Matcher object.
region_similarity_calculator: a
region_similarity_calculator.RegionSimilarityCalculator object.
encode_background_as_zeros: boolean determining whether background
targets are to be encoded as an all zeros vector or a one-hot
vector (where background is the 0th class).
negative_class_weight: Weight for confidence loss of negative anchors.
image_resizer_fn: a callable for image resizing. This callable always
takes a rank-3 image tensor (corresponding to a single image) and
returns a rank-3 image tensor, possibly with new spatial dimensions and
......@@ -288,6 +284,7 @@ class SSDMetaArch(model.DetectionModel):
localization_loss_weight: float
normalize_loss_by_num_matches: boolean
hard_example_miner: a losses.HardExampleMiner object (can be None)
target_assigner_instance: target_assigner.TargetAssigner instance to use.
add_summaries: boolean (default: True) controlling whether summary ops
should be added to tensorflow graph.
normalize_loc_loss_by_codesize: whether to normalize localization loss
......@@ -312,7 +309,6 @@ class SSDMetaArch(model.DetectionModel):
the random sampled examples.
expected_classification_loss_under_sampling: If not None, use
to calcualte classification loss by background/foreground weighting.
target_assigner_instance: target_assigner.TargetAssigner instance to use.
"""
super(SSDMetaArch, self).__init__(num_classes=box_predictor.num_classes)
self._is_training = is_training
......@@ -324,8 +320,6 @@ class SSDMetaArch(model.DetectionModel):
self._box_coder = box_coder
self._feature_extractor = feature_extractor
self._matcher = matcher
self._region_similarity_calculator = region_similarity_calculator
self._add_background_class = add_background_class
# Needed for fine-tuning from classification checkpoints whose
......@@ -347,14 +341,7 @@ class SSDMetaArch(model.DetectionModel):
self._unmatched_class_label = tf.constant((self.num_classes + 1) * [0],
tf.float32)
if target_assigner_instance:
self._target_assigner = target_assigner_instance
else:
self._target_assigner = target_assigner.TargetAssigner(
self._region_similarity_calculator,
self._matcher,
self._box_coder,
negative_class_weight=negative_class_weight)
self._target_assigner = target_assigner_instance
self._classification_loss = classification_loss
self._localization_loss = localization_loss
......@@ -523,28 +510,25 @@ class SSDMetaArch(model.DetectionModel):
im_height=image_shape[1],
im_width=image_shape[2]))
if self._box_predictor.is_keras_model:
prediction_dict = self._box_predictor(feature_maps)
predictor_results_dict = self._box_predictor(feature_maps)
else:
with slim.arg_scope([slim.batch_norm],
is_training=(self._is_training and
not self._freeze_batchnorm),
updates_collections=batchnorm_updates_collections):
prediction_dict = self._box_predictor.predict(
predictor_results_dict = self._box_predictor.predict(
feature_maps, self._anchor_generator.num_anchors_per_location())
box_encodings = tf.concat(prediction_dict['box_encodings'], axis=1)
if box_encodings.shape.ndims == 4 and box_encodings.shape[2] == 1:
box_encodings = tf.squeeze(box_encodings, axis=2)
class_predictions_with_background = tf.concat(
prediction_dict['class_predictions_with_background'], axis=1)
predictions_dict = {
'preprocessed_inputs': preprocessed_inputs,
'box_encodings': box_encodings,
'class_predictions_with_background':
class_predictions_with_background,
'feature_maps': feature_maps,
'anchors': self._anchors.get()
}
for prediction_key, prediction_list in iter(predictor_results_dict.items()):
prediction = tf.concat(prediction_list, axis=1)
if (prediction_key == 'box_encodings' and prediction.shape.ndims == 4 and
prediction.shape[2] == 1):
prediction = tf.squeeze(prediction, axis=2)
predictions_dict[prediction_key] = prediction
self._batched_prediction_tensor_names = [x for x in predictions_dict
if x != 'anchors']
return predictions_dict
......@@ -587,6 +571,10 @@ class SSDMetaArch(model.DetectionModel):
[batch_size, num_anchors, num_classes+1] containing class predictions
(logits) for each of the anchors. Note that this tensor *includes*
background class predictions.
4) mask_predictions: (optional) a 5-D float tensor of shape
[batch_size, num_anchors, q, mask_height, mask_width]. `q` can be
either number of classes or 1 depending on whether a separate mask is
predicted per class.
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
......@@ -599,6 +587,8 @@ class SSDMetaArch(model.DetectionModel):
detection_classes: [batch, max_detections]
detection_keypoints: [batch, max_detections, num_keypoints, 2] (if
encoded in the prediction_dict 'box_encodings')
detection_masks: [batch_size, max_detections, mask_height, mask_width]
(optional)
num_detections: [batch]
Raises:
ValueError: if prediction_dict does not contain `box_encodings` or
......@@ -627,13 +617,14 @@ class SSDMetaArch(model.DetectionModel):
if detection_keypoints is not None:
additional_fields = {
fields.BoxListFields.keypoints: detection_keypoints}
(nmsed_boxes, nmsed_scores, nmsed_classes, _, nmsed_additional_fields,
num_detections) = self._non_max_suppression_fn(
(nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
nmsed_additional_fields, num_detections) = self._non_max_suppression_fn(
detection_boxes,
detection_scores,
clip_window=self._compute_clip_window(
preprocessed_images, true_image_shapes),
additional_fields=additional_fields)
clip_window=self._compute_clip_window(preprocessed_images,
true_image_shapes),
additional_fields=additional_fields,
masks=prediction_dict.get('mask_predictions'))
detection_dict = {
fields.DetectionResultFields.detection_boxes: nmsed_boxes,
fields.DetectionResultFields.detection_scores: nmsed_scores,
......@@ -645,6 +636,9 @@ class SSDMetaArch(model.DetectionModel):
fields.BoxListFields.keypoints in nmsed_additional_fields):
detection_dict[fields.DetectionResultFields.detection_keypoints] = (
nmsed_additional_fields[fields.BoxListFields.keypoints])
if nmsed_masks is not None:
detection_dict[
fields.DetectionResultFields.detection_masks] = nmsed_masks
return detection_dict
def loss(self, prediction_dict, true_image_shapes, scope=None):
......@@ -701,16 +695,22 @@ class SSDMetaArch(model.DetectionModel):
batch_cls_weights = tf.multiply(batch_sampled_indicator,
batch_cls_weights)
losses_mask = None
if self.groundtruth_has_field(fields.InputDataFields.is_annotated):
losses_mask = tf.stack(self.groundtruth_lists(
fields.InputDataFields.is_annotated))
location_losses = self._localization_loss(
prediction_dict['box_encodings'],
batch_reg_targets,
ignore_nan_targets=True,
weights=batch_reg_weights)
weights=batch_reg_weights,
losses_mask=losses_mask)
cls_losses = self._classification_loss(
prediction_dict['class_predictions_with_background'],
batch_cls_targets,
weights=batch_cls_weights)
weights=batch_cls_weights,
losses_mask=losses_mask)
if self._expected_classification_loss_under_sampling:
if cls_losses.get_shape().ndims == 3:
......@@ -734,12 +734,6 @@ class SSDMetaArch(model.DetectionModel):
self._hard_example_miner.summarize()
else:
cls_losses = ops.reduce_sum_trailing_dimensions(cls_losses, ndims=2)
if self._add_summaries:
class_ids = tf.argmax(batch_cls_targets, axis=2)
flattened_class_ids = tf.reshape(class_ids, [-1])
flattened_classification_losses = tf.reshape(cls_losses, [-1])
self._summarize_anchor_classification_loss(
flattened_class_ids, flattened_classification_losses)
localization_loss = tf.reduce_sum(location_losses)
classification_loss = tf.reduce_sum(cls_losses)
......
......@@ -14,105 +14,26 @@
# ==============================================================================
"""Tests for object_detection.meta_architectures.ssd_meta_arch."""
import functools
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
from object_detection.core import anchor_generator
from object_detection.core import balanced_positive_negative_sampler as sampler
from object_detection.core import box_list
from object_detection.core import losses
from object_detection.core import post_processing
from object_detection.core import region_similarity_calculator as sim_calc
from object_detection.core import target_assigner
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.utils import ops
from object_detection.utils import test_case
from object_detection.meta_architectures import ssd_meta_arch_test_lib
from object_detection.utils import test_utils
slim = tf.contrib.slim
keras = tf.keras.layers
class FakeSSDFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
def __init__(self):
super(FakeSSDFeatureExtractor, self).__init__(
is_training=True,
depth_multiplier=0,
min_depth=0,
pad_to_multiple=1,
conv_hyperparams_fn=None)
def preprocess(self, resized_inputs):
return tf.identity(resized_inputs)
def extract_features(self, preprocessed_inputs):
with tf.variable_scope('mock_model'):
features = slim.conv2d(inputs=preprocessed_inputs, num_outputs=32,
kernel_size=1, scope='layer1')
return [features]
class FakeSSDKerasFeatureExtractor(ssd_meta_arch.SSDKerasFeatureExtractor):
def __init__(self):
with tf.name_scope('mock_model'):
super(FakeSSDKerasFeatureExtractor, self).__init__(
is_training=True,
depth_multiplier=0,
min_depth=0,
pad_to_multiple=1,
conv_hyperparams_config=None,
freeze_batchnorm=False,
inplace_batchnorm_update=False,
)
self._conv = keras.Conv2D(filters=32, kernel_size=1, name='layer1')
def preprocess(self, resized_inputs):
return tf.identity(resized_inputs)
def _extract_features(self, preprocessed_inputs, **kwargs):
with tf.name_scope('mock_model'):
return [self._conv(preprocessed_inputs)]
class MockAnchorGenerator2x2(anchor_generator.AnchorGenerator):
"""Sets up a simple 2x2 anchor grid on the unit square."""
def name_scope(self):
return 'MockAnchorGenerator'
def num_anchors_per_location(self):
return [1]
def _generate(self, feature_map_shape_list, im_height, im_width):
return [box_list.BoxList(
tf.constant([[0, 0, .5, .5],
[0, .5, .5, 1],
[.5, 0, 1, .5],
[1., 1., 1.5, 1.5] # Anchor that is outside clip_window.
], tf.float32))]
def num_anchors(self):
return 4
def _get_value_for_matching_key(dictionary, suffix):
for key in dictionary.keys():
if key.endswith(suffix):
return dictionary[key]
raise ValueError('key not found {}'.format(suffix))
@parameterized.parameters(
{'use_keras': False},
{'use_keras': True},
)
class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
parameterized.TestCase):
def _create_model(self,
apply_hard_mining=True,
......@@ -123,96 +44,25 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
use_expected_classification_loss_under_sampling=False,
minimum_negative_sampling=1,
desired_negative_sampling_ratio=3,
use_keras=False):
is_training = False
num_classes = 1
mock_anchor_generator = MockAnchorGenerator2x2()
if use_keras:
mock_box_predictor = test_utils.MockKerasBoxPredictor(
is_training, num_classes)
else:
mock_box_predictor = test_utils.MockBoxPredictor(
is_training, num_classes)
mock_box_coder = test_utils.MockBoxCoder()
if use_keras:
fake_feature_extractor = FakeSSDKerasFeatureExtractor()
else:
fake_feature_extractor = FakeSSDFeatureExtractor()
mock_matcher = test_utils.MockMatcher()
region_similarity_calculator = sim_calc.IouSimilarity()
encode_background_as_zeros = False
def image_resizer_fn(image):
return [tf.identity(image), tf.shape(image)]
classification_loss = losses.WeightedSigmoidClassificationLoss()
localization_loss = losses.WeightedSmoothL1LocalizationLoss()
non_max_suppression_fn = functools.partial(
post_processing.batch_multiclass_non_max_suppression,
score_thresh=-20.0,
iou_thresh=1.0,
max_size_per_class=5,
max_total_size=5)
classification_loss_weight = 1.0
localization_loss_weight = 1.0
negative_class_weight = 1.0
normalize_loss_by_num_matches = False
hard_example_miner = None
if apply_hard_mining:
# This hard example miner is expected to be a no-op.
hard_example_miner = losses.HardExampleMiner(
num_hard_examples=None,
iou_threshold=1.0)
random_example_sampler = None
if random_example_sampling:
random_example_sampler = sampler.BalancedPositiveNegativeSampler(
positive_fraction=0.5)
target_assigner_instance = target_assigner.TargetAssigner(
region_similarity_calculator,
mock_matcher,
mock_box_coder,
negative_class_weight=negative_class_weight,
weight_regression_loss_by_score=weight_regression_loss_by_score)
expected_classification_loss_under_sampling = None
if use_expected_classification_loss_under_sampling:
expected_classification_loss_under_sampling = functools.partial(
ops.expected_classification_loss_under_sampling,
minimum_negative_sampling=minimum_negative_sampling,
desired_negative_sampling_ratio=desired_negative_sampling_ratio)
code_size = 4
model = ssd_meta_arch.SSDMetaArch(
is_training,
mock_anchor_generator,
mock_box_predictor,
mock_box_coder,
fake_feature_extractor,
mock_matcher,
region_similarity_calculator,
encode_background_as_zeros,
negative_class_weight,
image_resizer_fn,
non_max_suppression_fn,
tf.identity,
classification_loss,
localization_loss,
classification_loss_weight,
localization_loss_weight,
normalize_loss_by_num_matches,
hard_example_miner,
target_assigner_instance=target_assigner_instance,
add_summaries=False,
use_keras=False,
predict_mask=False,
use_static_shapes=False,
nms_max_size_per_class=5):
return super(SsdMetaArchTest, self)._create_model(
model_fn=ssd_meta_arch.SSDMetaArch,
apply_hard_mining=apply_hard_mining,
normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
freeze_batchnorm=False,
inplace_batchnorm_update=False,
add_background_class=add_background_class,
random_example_sampler=random_example_sampler,
expected_classification_loss_under_sampling=
expected_classification_loss_under_sampling)
return model, num_classes, mock_anchor_generator.num_anchors(), code_size
random_example_sampling=random_example_sampling,
weight_regression_loss_by_score=weight_regression_loss_by_score,
use_expected_classification_loss_under_sampling=
use_expected_classification_loss_under_sampling,
minimum_negative_sampling=minimum_negative_sampling,
desired_negative_sampling_ratio=desired_negative_sampling_ratio,
use_keras=use_keras,
predict_mask=predict_mask,
use_static_shapes=use_static_shapes,
nms_max_size_per_class=nms_max_size_per_class)
def test_preprocess_preserves_shapes_with_dynamic_input_image(
self, use_keras):
......@@ -360,6 +210,7 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
self.assertAllClose(detections_out['num_detections'],
expected_num_detections)
def test_loss_results_are_correct(self, use_keras):
with tf.Graph().as_default():
......@@ -374,9 +225,10 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
prediction_dict = model.predict(preprocessed_tensor,
true_image_shapes=None)
loss_dict = model.loss(prediction_dict, true_image_shapes=None)
return (
_get_value_for_matching_key(loss_dict, 'Loss/localization_loss'),
_get_value_for_matching_key(loss_dict, 'Loss/classification_loss'))
return (self._get_value_for_matching_key(loss_dict,
'Loss/localization_loss'),
self._get_value_for_matching_key(loss_dict,
'Loss/classification_loss'))
batch_size = 2
preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32)
......@@ -413,7 +265,8 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
prediction_dict = model.predict(preprocessed_tensor,
true_image_shapes=None)
loss_dict = model.loss(prediction_dict, true_image_shapes=None)
return (_get_value_for_matching_key(loss_dict, 'Loss/localization_loss'),)
return (self._get_value_for_matching_key(loss_dict,
'Loss/localization_loss'),)
batch_size = 2
preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32)
......@@ -443,9 +296,10 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
prediction_dict = model.predict(preprocessed_tensor,
true_image_shapes=None)
loss_dict = model.loss(prediction_dict, true_image_shapes=None)
return (
_get_value_for_matching_key(loss_dict, 'Loss/localization_loss'),
_get_value_for_matching_key(loss_dict, 'Loss/classification_loss'))
return (self._get_value_for_matching_key(loss_dict,
'Loss/localization_loss'),
self._get_value_for_matching_key(loss_dict,
'Loss/classification_loss'))
batch_size = 2
preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32)
......@@ -591,6 +445,55 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
self.assertAllClose(localization_loss, expected_localization_loss)
self.assertAllClose(classification_loss, expected_classification_loss)
def test_loss_results_are_correct_with_losses_mask(self, use_keras):
with tf.Graph().as_default():
_, num_classes, num_anchors, _ = self._create_model(use_keras=use_keras)
def graph_fn(preprocessed_tensor, groundtruth_boxes1, groundtruth_boxes2,
groundtruth_boxes3, groundtruth_classes1, groundtruth_classes2,
groundtruth_classes3):
groundtruth_boxes_list = [groundtruth_boxes1, groundtruth_boxes2,
groundtruth_boxes3]
groundtruth_classes_list = [groundtruth_classes1, groundtruth_classes2,
groundtruth_classes3]
is_annotated_list = [tf.constant(True), tf.constant(True),
tf.constant(False)]
model, _, _, _ = self._create_model(apply_hard_mining=False)
model.provide_groundtruth(groundtruth_boxes_list,
groundtruth_classes_list,
is_annotated_list=is_annotated_list)
prediction_dict = model.predict(preprocessed_tensor,
true_image_shapes=None)
loss_dict = model.loss(prediction_dict, true_image_shapes=None)
return (self._get_value_for_matching_key(loss_dict,
'Loss/localization_loss'),
self._get_value_for_matching_key(loss_dict,
'Loss/classification_loss'))
batch_size = 3
preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32)
groundtruth_boxes1 = np.array([[0, 0, .5, .5]], dtype=np.float32)
groundtruth_boxes2 = np.array([[0, 0, .5, .5]], dtype=np.float32)
groundtruth_boxes3 = np.array([[0, 0, .5, .5]], dtype=np.float32)
groundtruth_classes1 = np.array([[1]], dtype=np.float32)
groundtruth_classes2 = np.array([[1]], dtype=np.float32)
groundtruth_classes3 = np.array([[1]], dtype=np.float32)
expected_localization_loss = 0.0
# Note that we are subtracting 1 from batch_size, since the final image is
# not annotated.
expected_classification_loss = ((batch_size - 1) * num_anchors
* (num_classes+1) * np.log(2.0))
(localization_loss,
classification_loss) = self.execute(graph_fn, [preprocessed_input,
groundtruth_boxes1,
groundtruth_boxes2,
groundtruth_boxes3,
groundtruth_classes1,
groundtruth_classes2,
groundtruth_classes3])
self.assertAllClose(localization_loss, expected_localization_loss)
self.assertAllClose(classification_loss, expected_classification_loss)
def test_restore_map_for_detection_ckpt(self, use_keras):
model, _, _, _ = self._create_model(use_keras=use_keras)
model.predict(tf.constant(np.array([[[[0, 0], [1, 1]], [[1, 0], [0, 1]]]],
......@@ -678,10 +581,8 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
use_keras):
with tf.Graph().as_default():
_, num_classes, num_anchors, _ = self._create_model(
random_example_sampling=True,
use_keras=use_keras)
print num_classes, num_anchors
_, num_classes, _, _ = self._create_model(
random_example_sampling=True, use_keras=use_keras)
def graph_fn(preprocessed_tensor, groundtruth_boxes1, groundtruth_boxes2,
groundtruth_classes1, groundtruth_classes2):
......@@ -694,9 +595,10 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
prediction_dict = model.predict(
preprocessed_tensor, true_image_shapes=None)
loss_dict = model.loss(prediction_dict, true_image_shapes=None)
return (_get_value_for_matching_key(loss_dict, 'Loss/localization_loss'),
_get_value_for_matching_key(loss_dict,
'Loss/classification_loss'))
return (self._get_value_for_matching_key(loss_dict,
'Loss/localization_loss'),
self._get_value_for_matching_key(loss_dict,
'Loss/classification_loss'))
batch_size = 2
preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32)
......
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Helper functions for SSD models meta architecture tests."""
import functools
import tensorflow as tf
from object_detection.core import anchor_generator
from object_detection.core import balanced_positive_negative_sampler as sampler
from object_detection.core import box_list
from object_detection.core import losses
from object_detection.core import post_processing
from object_detection.core import region_similarity_calculator as sim_calc
from object_detection.core import target_assigner
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.utils import ops
from object_detection.utils import test_case
from object_detection.utils import test_utils
slim = tf.contrib.slim
keras = tf.keras.layers
class FakeSSDFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""Fake ssd feature extracture for ssd meta arch tests."""
def __init__(self):
super(FakeSSDFeatureExtractor, self).__init__(
is_training=True,
depth_multiplier=0,
min_depth=0,
pad_to_multiple=1,
conv_hyperparams_fn=None)
def preprocess(self, resized_inputs):
return tf.identity(resized_inputs)
def extract_features(self, preprocessed_inputs):
with tf.variable_scope('mock_model'):
features = slim.conv2d(
inputs=preprocessed_inputs,
num_outputs=32,
kernel_size=1,
scope='layer1')
return [features]
class FakeSSDKerasFeatureExtractor(ssd_meta_arch.SSDKerasFeatureExtractor):
"""Fake keras based ssd feature extracture for ssd meta arch tests."""
def __init__(self):
with tf.name_scope('mock_model'):
super(FakeSSDKerasFeatureExtractor, self).__init__(
is_training=True,
depth_multiplier=0,
min_depth=0,
pad_to_multiple=1,
conv_hyperparams=None,
freeze_batchnorm=False,
inplace_batchnorm_update=False,
)
self._conv = keras.Conv2D(filters=32, kernel_size=1, name='layer1')
def preprocess(self, resized_inputs):
return tf.identity(resized_inputs)
def _extract_features(self, preprocessed_inputs, **kwargs):
with tf.name_scope('mock_model'):
return [self._conv(preprocessed_inputs)]
class MockAnchorGenerator2x2(anchor_generator.AnchorGenerator):
"""A simple 2x2 anchor grid on the unit square used for test only."""
def name_scope(self):
return 'MockAnchorGenerator'
def num_anchors_per_location(self):
return [1]
def _generate(self, feature_map_shape_list, im_height, im_width):
return [
box_list.BoxList(
tf.constant(
[
[0, 0, .5, .5],
[0, .5, .5, 1],
[.5, 0, 1, .5],
[1., 1., 1.5, 1.5] # Anchor that is outside clip_window.
],
tf.float32))
]
def num_anchors(self):
return 4
class SSDMetaArchTestBase(test_case.TestCase):
"""Base class to test SSD based meta architectures."""
def _create_model(self,
model_fn=ssd_meta_arch.SSDMetaArch,
apply_hard_mining=True,
normalize_loc_loss_by_codesize=False,
add_background_class=True,
random_example_sampling=False,
weight_regression_loss_by_score=False,
use_expected_classification_loss_under_sampling=False,
minimum_negative_sampling=1,
desired_negative_sampling_ratio=3,
use_keras=False,
predict_mask=False,
use_static_shapes=False,
nms_max_size_per_class=5):
is_training = False
num_classes = 1
mock_anchor_generator = MockAnchorGenerator2x2()
if use_keras:
mock_box_predictor = test_utils.MockKerasBoxPredictor(
is_training, num_classes, predict_mask=predict_mask)
else:
mock_box_predictor = test_utils.MockBoxPredictor(
is_training, num_classes, predict_mask=predict_mask)
mock_box_coder = test_utils.MockBoxCoder()
if use_keras:
fake_feature_extractor = FakeSSDKerasFeatureExtractor()
else:
fake_feature_extractor = FakeSSDFeatureExtractor()
mock_matcher = test_utils.MockMatcher()
region_similarity_calculator = sim_calc.IouSimilarity()
encode_background_as_zeros = False
def image_resizer_fn(image):
return [tf.identity(image), tf.shape(image)]
classification_loss = losses.WeightedSigmoidClassificationLoss()
localization_loss = losses.WeightedSmoothL1LocalizationLoss()
non_max_suppression_fn = functools.partial(
post_processing.batch_multiclass_non_max_suppression,
score_thresh=-20.0,
iou_thresh=1.0,
max_size_per_class=nms_max_size_per_class,
max_total_size=nms_max_size_per_class,
use_static_shapes=use_static_shapes)
classification_loss_weight = 1.0
localization_loss_weight = 1.0
negative_class_weight = 1.0
normalize_loss_by_num_matches = False
hard_example_miner = None
if apply_hard_mining:
# This hard example miner is expected to be a no-op.
hard_example_miner = losses.HardExampleMiner(
num_hard_examples=None, iou_threshold=1.0)
random_example_sampler = None
if random_example_sampling:
random_example_sampler = sampler.BalancedPositiveNegativeSampler(
positive_fraction=0.5)
target_assigner_instance = target_assigner.TargetAssigner(
region_similarity_calculator,
mock_matcher,
mock_box_coder,
negative_class_weight=negative_class_weight,
weight_regression_loss_by_score=weight_regression_loss_by_score)
expected_classification_loss_under_sampling = None
if use_expected_classification_loss_under_sampling:
expected_classification_loss_under_sampling = functools.partial(
ops.expected_classification_loss_under_sampling,
minimum_negative_sampling=minimum_negative_sampling,
desired_negative_sampling_ratio=desired_negative_sampling_ratio)
code_size = 4
model = model_fn(
is_training=is_training,
anchor_generator=mock_anchor_generator,
box_predictor=mock_box_predictor,
box_coder=mock_box_coder,
feature_extractor=fake_feature_extractor,
encode_background_as_zeros=encode_background_as_zeros,
image_resizer_fn=image_resizer_fn,
non_max_suppression_fn=non_max_suppression_fn,
score_conversion_fn=tf.identity,
classification_loss=classification_loss,
localization_loss=localization_loss,
classification_loss_weight=classification_loss_weight,
localization_loss_weight=localization_loss_weight,
normalize_loss_by_num_matches=normalize_loss_by_num_matches,
hard_example_miner=hard_example_miner,
target_assigner_instance=target_assigner_instance,
add_summaries=False,
normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
freeze_batchnorm=False,
inplace_batchnorm_update=False,
add_background_class=add_background_class,
random_example_sampler=random_example_sampler,
expected_classification_loss_under_sampling=
expected_classification_loss_under_sampling)
return model, num_classes, mock_anchor_generator.num_anchors(), code_size
def _get_value_for_matching_key(self, dictionary, suffix):
for key in dictionary.keys():
if key.endswith(suffix):
return dictionary[key]
raise ValueError('key not found {}'.format(suffix))
if __name__ == '__main__':
tf.test.main()
......@@ -18,6 +18,7 @@ import tensorflow as tf
from object_detection.core import standard_fields
from object_detection.metrics import coco_tools
from object_detection.utils import json_utils
from object_detection.utils import object_detection_evaluation
......@@ -148,6 +149,19 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
detection_classes]))
self._image_ids[image_id] = True
def dump_detections_to_json_file(self, json_output_path):
"""Saves the detections into json_output_path in the format used by MS COCO.
Args:
json_output_path: String containing the output file's path. It can be also
None. In that case nothing will be written to the output file.
"""
if json_output_path and json_output_path is not None:
with tf.gfile.GFile(json_output_path, 'w') as fid:
tf.logging.info('Dumping detections to output json file.')
json_utils.Dump(
obj=self._detection_boxes_list, fid=fid, float_digits=4, indent=2)
def evaluate(self):
"""Evaluates the detection boxes and returns a dictionary of coco metrics.
......@@ -245,10 +259,11 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
detection_boxes_batched, detection_scores_batched,
detection_classes_batched, num_det_boxes_per_image):
self.add_single_ground_truth_image_info(
image_id,
{'groundtruth_boxes': gt_box[:num_gt_box],
'groundtruth_classes': gt_class[:num_gt_box],
'groundtruth_is_crowd': gt_is_crowd[:num_gt_box]})
image_id, {
'groundtruth_boxes': gt_box[:num_gt_box],
'groundtruth_classes': gt_class[:num_gt_box],
'groundtruth_is_crowd': gt_is_crowd[:num_gt_box]
})
self.add_single_detected_image_info(
image_id,
{'detection_boxes': det_box[:num_det_box],
......@@ -268,8 +283,7 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
detection_classes = eval_dict[detection_fields.detection_classes]
num_gt_boxes_per_image = eval_dict.get(
'num_groundtruth_boxes_per_image', None)
num_det_boxes_per_image = eval_dict.get(
'num_groundtruth_boxes_per_image', None)
num_det_boxes_per_image = eval_dict.get('num_det_boxes_per_image', None)
if groundtruth_is_crowd is None:
groundtruth_is_crowd = tf.zeros_like(groundtruth_classes, dtype=tf.bool)
......@@ -491,6 +505,19 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
detection_classes]))
self._image_ids_with_detections.update([image_id])
def dump_detections_to_json_file(self, json_output_path):
"""Saves the detections into json_output_path in the format used by MS COCO.
Args:
json_output_path: String containing the output file's path. It can be also
None. In that case nothing will be written to the output file.
"""
if json_output_path and json_output_path is not None:
tf.logging.info('Dumping detections to output json file.')
with tf.gfile.GFile(json_output_path, 'w') as fid:
json_utils.Dump(
obj=self._detection_masks_list, fid=fid, float_digits=4, indent=2)
def evaluate(self):
"""Evaluates the detection masks and returns a dictionary of coco metrics.
......
......@@ -24,14 +24,25 @@ from object_detection.core import standard_fields
from object_detection.metrics import coco_evaluation
def _get_categories_list():
return [{
'id': 1,
'name': 'person'
}, {
'id': 2,
'name': 'dog'
}, {
'id': 3,
'name': 'cat'
}]
class CocoDetectionEvaluationTest(tf.test.TestCase):
def testGetOneMAPWithMatchingGroundtruthAndDetections(self):
"""Tests that mAP is calculated correctly on GT and Detections."""
category_list = [{'id': 0, 'name': 'person'},
{'id': 1, 'name': 'cat'},
{'id': 2, 'name': 'dog'}]
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(category_list)
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
_get_categories_list())
coco_evaluator.add_single_ground_truth_image_info(
image_id='image1',
groundtruth_dict={
......@@ -88,17 +99,8 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
def testGetOneMAPWithMatchingGroundtruthAndDetectionsSkipCrowd(self):
"""Tests computing mAP with is_crowd GT boxes skipped."""
category_list = [{
'id': 0,
'name': 'person'
}, {
'id': 1,
'name': 'cat'
}, {
'id': 2,
'name': 'dog'
}]
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(category_list)
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
_get_categories_list())
coco_evaluator.add_single_ground_truth_image_info(
image_id='image1',
groundtruth_dict={
......@@ -124,17 +126,8 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
def testGetOneMAPWithMatchingGroundtruthAndDetectionsEmptyCrowd(self):
"""Tests computing mAP with empty is_crowd array passed in."""
category_list = [{
'id': 0,
'name': 'person'
}, {
'id': 1,
'name': 'cat'
}, {
'id': 2,
'name': 'dog'
}]
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(category_list)
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
_get_categories_list())
coco_evaluator.add_single_ground_truth_image_info(
image_id='image1',
groundtruth_dict={
......@@ -160,11 +153,9 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
def testRejectionOnDuplicateGroundtruth(self):
"""Tests that groundtruth cannot be added more than once for an image."""
categories = [{'id': 1, 'name': 'cat'},
{'id': 2, 'name': 'dog'},
{'id': 3, 'name': 'elephant'}]
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
_get_categories_list())
# Add groundtruth
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(categories)
image_key1 = 'img1'
groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
dtype=float)
......@@ -189,11 +180,9 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
def testRejectionOnDuplicateDetections(self):
"""Tests that detections cannot be added more than once for an image."""
categories = [{'id': 1, 'name': 'cat'},
{'id': 2, 'name': 'dog'},
{'id': 3, 'name': 'elephant'}]
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
_get_categories_list())
# Add groundtruth
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(categories)
coco_evaluator.add_single_ground_truth_image_info(
image_id='image1',
groundtruth_dict={
......@@ -227,10 +216,8 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
def testExceptionRaisedWithMissingGroundtruth(self):
"""Tests that exception is raised for detection with missing groundtruth."""
categories = [{'id': 1, 'name': 'cat'},
{'id': 2, 'name': 'dog'},
{'id': 3, 'name': 'elephant'}]
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(categories)
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
_get_categories_list())
with self.assertRaises(ValueError):
coco_evaluator.add_single_detected_image_info(
image_id='image1',
......@@ -247,10 +234,8 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
class CocoEvaluationPyFuncTest(tf.test.TestCase):
def testGetOneMAPWithMatchingGroundtruthAndDetections(self):
category_list = [{'id': 0, 'name': 'person'},
{'id': 1, 'name': 'cat'},
{'id': 2, 'name': 'dog'}]
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(category_list)
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
_get_categories_list())
image_id = tf.placeholder(tf.string, shape=())
groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4))
groundtruth_classes = tf.placeholder(tf.float32, shape=(None))
......@@ -310,31 +295,22 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'],
-1.0)
1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'],
-1.0)
1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0)
self.assertFalse(coco_evaluator._groundtruth_list)
self.assertFalse(coco_evaluator._detection_boxes_list)
self.assertFalse(coco_evaluator._image_ids)
def testGetOneMAPWithMatchingGroundtruthAndDetectionsPadded(self):
category_list = [{
'id': 0,
'name': 'person'
}, {
'id': 1,
'name': 'cat'
}, {
'id': 2,
'name': 'dog'
}]
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(category_list)
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
_get_categories_list())
image_id = tf.placeholder(tf.string, shape=())
groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4))
groundtruth_classes = tf.placeholder(tf.float32, shape=(None))
......@@ -415,24 +391,22 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'],
-1.0)
1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 0.75)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 0.83333331)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'],
-1.0)
1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0)
self.assertFalse(coco_evaluator._groundtruth_list)
self.assertFalse(coco_evaluator._detection_boxes_list)
self.assertFalse(coco_evaluator._image_ids)
def testGetOneMAPWithMatchingGroundtruthAndDetectionsBatched(self):
category_list = [{'id': 0, 'name': 'person'},
{'id': 1, 'name': 'cat'},
{'id': 2, 'name': 'dog'}]
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(category_list)
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
_get_categories_list())
batch_size = 3
image_id = tf.placeholder(tf.string, shape=(batch_size))
groundtruth_boxes = tf.placeholder(tf.float32, shape=(batch_size, None, 4))
......@@ -479,24 +453,22 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'],
-1.0)
1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'],
-1.0)
1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0)
self.assertFalse(coco_evaluator._groundtruth_list)
self.assertFalse(coco_evaluator._detection_boxes_list)
self.assertFalse(coco_evaluator._image_ids)
def testGetOneMAPWithMatchingGroundtruthAndDetectionsPaddedBatches(self):
category_list = [{'id': 0, 'name': 'person'},
{'id': 1, 'name': 'cat'},
{'id': 2, 'name': 'dog'}]
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(category_list)
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
_get_categories_list())
batch_size = 3
image_id = tf.placeholder(tf.string, shape=(batch_size))
groundtruth_boxes = tf.placeholder(tf.float32, shape=(batch_size, None, 4))
......@@ -525,27 +497,40 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
_, update_op = eval_metric_ops['DetectionBoxes_Precision/mAP']
with self.test_session() as sess:
sess.run(update_op,
feed_dict={
image_id: ['image1', 'image2', 'image3'],
groundtruth_boxes: np.array([[[100., 100., 200., 200.],
[-1, -1, -1, -1]],
[[50., 50., 100., 100.],
[-1, -1, -1, -1]],
[[25., 25., 50., 50.],
[10., 10., 15., 15.]]]),
groundtruth_classes: np.array([[1, -1], [3, -1], [2, 2]]),
num_gt_boxes_per_image: np.array([1, 1, 2]),
detection_boxes: np.array([[[100., 100., 200., 200.],
[0., 0., 0., 0.]],
[[50., 50., 100., 100.],
[0., 0., 0., 0.]],
[[25., 25., 50., 50.],
[10., 10., 15., 15.]]]),
detection_scores: np.array([[.8, 0.], [.7, 0.], [.95, .9]]),
detection_classes: np.array([[1, -1], [3, -1], [2, 2]]),
num_det_boxes_per_image: np.array([1, 1, 2]),
})
sess.run(
update_op,
feed_dict={
image_id: ['image1', 'image2', 'image3'],
groundtruth_boxes:
np.array([[[100., 100., 200., 200.], [-1, -1, -1, -1]],
[[50., 50., 100., 100.], [-1, -1, -1, -1]],
[[25., 25., 50., 50.], [10., 10., 15., 15.]]]),
groundtruth_classes:
np.array([[1, -1], [3, -1], [2, 2]]),
num_gt_boxes_per_image:
np.array([1, 1, 2]),
detection_boxes:
np.array([[[100., 100., 200., 200.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]],
[[50., 50., 100., 100.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]],
[[25., 25., 50., 50.],
[10., 10., 15., 15.],
[10., 10., 15., 15.]]]),
detection_scores:
np.array([[.8, 0., 0.], [.7, 0., 0.], [.95, .9, 0.9]]),
detection_classes:
np.array([[1, -1, -1], [3, -1, -1], [2, 2, 2]]),
num_det_boxes_per_image:
np.array([1, 1, 3]),
})
# Check the number of bounding boxes added.
self.assertEqual(len(coco_evaluator._groundtruth_list), 4)
self.assertEqual(len(coco_evaluator._detection_boxes_list), 5)
metrics = {}
for key, (value_op, _) in eval_metric_ops.iteritems():
metrics[key] = value_op
......@@ -555,14 +540,14 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'],
-1.0)
1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 0.75)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 0.83333331)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'],
-1.0)
1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0)
self.assertFalse(coco_evaluator._groundtruth_list)
self.assertFalse(coco_evaluator._detection_boxes_list)
......@@ -572,10 +557,7 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
class CocoMaskEvaluationTest(tf.test.TestCase):
def testGetOneMAPWithMatchingGroundtruthAndDetections(self):
category_list = [{'id': 0, 'name': 'person'},
{'id': 1, 'name': 'cat'},
{'id': 2, 'name': 'dog'}]
coco_evaluator = coco_evaluation.CocoMaskEvaluator(category_list)
coco_evaluator = coco_evaluation.CocoMaskEvaluator(_get_categories_list())
coco_evaluator.add_single_ground_truth_image_info(
image_id='image1',
groundtruth_dict={
......@@ -657,10 +639,7 @@ class CocoMaskEvaluationTest(tf.test.TestCase):
class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
def testGetOneMAPWithMatchingGroundtruthAndDetections(self):
category_list = [{'id': 0, 'name': 'person'},
{'id': 1, 'name': 'cat'},
{'id': 2, 'name': 'dog'}]
coco_evaluator = coco_evaluation.CocoMaskEvaluator(category_list)
coco_evaluator = coco_evaluation.CocoMaskEvaluator(_get_categories_list())
image_id = tf.placeholder(tf.string, shape=())
groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4))
groundtruth_classes = tf.placeholder(tf.float32, shape=(None))
......@@ -756,5 +735,6 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
self.assertFalse(coco_evaluator._image_id_to_mask_shape_map)
self.assertFalse(coco_evaluator._detection_masks_list)
if __name__ == '__main__':
tf.test.main()
......@@ -91,10 +91,8 @@ def read_data_and_evaluate(input_config, eval_config):
if input_config.WhichOneof('input_reader') == 'tf_record_input_reader':
input_paths = input_config.tf_record_input_reader.input_path
label_map = label_map_util.load_labelmap(input_config.label_map_path)
max_num_classes = max([item.id for item in label_map.item])
categories = label_map_util.convert_label_map_to_categories(
label_map, max_num_classes)
categories = label_map_util.create_categories_from_labelmap(
input_config.label_map_path)
object_detection_evaluators = evaluator.get_evaluators(
eval_config, categories)
......
......@@ -18,6 +18,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import copy
import functools
import os
......@@ -43,9 +44,12 @@ MODEL_BUILD_UTIL_MAP = {
config_util.create_pipeline_proto_from_configs,
'merge_external_params_with_configs':
config_util.merge_external_params_with_configs,
'create_train_input_fn': inputs.create_train_input_fn,
'create_eval_input_fn': inputs.create_eval_input_fn,
'create_predict_input_fn': inputs.create_predict_input_fn,
'create_train_input_fn':
inputs.create_train_input_fn,
'create_eval_input_fn':
inputs.create_eval_input_fn,
'create_predict_input_fn':
inputs.create_predict_input_fn,
}
......@@ -126,8 +130,9 @@ def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True):
ValueError: If unpad_tensors is True and `tensor_dict` does not contain
`num_groundtruth_boxes` tensor.
"""
unbatched_tensor_dict = {key: tf.unstack(tensor)
for key, tensor in tensor_dict.items()}
unbatched_tensor_dict = {
key: tf.unstack(tensor) for key, tensor in tensor_dict.items()
}
if unpad_groundtruth_tensors:
if (fields.InputDataFields.num_groundtruth_boxes not in
unbatched_tensor_dict):
......@@ -206,8 +211,8 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
# Make sure to set the Keras learning phase. True during training,
# False for inference.
tf.keras.backend.set_learning_phase(is_training)
detection_model = detection_model_fn(is_training=is_training,
add_summaries=(not use_tpu))
detection_model = detection_model_fn(
is_training=is_training, add_summaries=(not use_tpu))
scaffold_fn = None
if mode == tf.estimator.ModeKeys.TRAIN:
......@@ -237,6 +242,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
gt_weights_list = None
if fields.InputDataFields.groundtruth_weights in labels:
gt_weights_list = labels[fields.InputDataFields.groundtruth_weights]
gt_is_crowd_list = None
if fields.InputDataFields.groundtruth_is_crowd in labels:
gt_is_crowd_list = labels[fields.InputDataFields.groundtruth_is_crowd]
detection_model.provide_groundtruth(
......@@ -248,8 +254,18 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
groundtruth_is_crowd_list=gt_is_crowd_list)
preprocessed_images = features[fields.InputDataFields.image]
prediction_dict = detection_model.predict(
preprocessed_images, features[fields.InputDataFields.true_image_shape])
if use_tpu and train_config.use_bfloat16:
with tf.contrib.tpu.bfloat16_scope():
prediction_dict = detection_model.predict(
preprocessed_images,
features[fields.InputDataFields.true_image_shape])
for k, v in prediction_dict.items():
if v.dtype == tf.bfloat16:
prediction_dict[k] = tf.cast(v, tf.float32)
else:
prediction_dict = detection_model.predict(
preprocessed_images,
features[fields.InputDataFields.true_image_shape])
if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT):
detections = detection_model.postprocess(
prediction_dict, features[fields.InputDataFields.true_image_shape])
......@@ -270,13 +286,16 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
train_config.load_all_detection_checkpoint_vars))
available_var_map = (
variables_helper.get_variables_available_in_checkpoint(
asg_map, train_config.fine_tune_checkpoint,
asg_map,
train_config.fine_tune_checkpoint,
include_global_step=False))
if use_tpu:
def tpu_scaffold():
tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint,
available_var_map)
return tf.train.Scaffold()
scaffold_fn = tpu_scaffold
else:
tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint,
......@@ -290,8 +309,8 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
regularization_losses = tf.get_collection(
tf.GraphKeys.REGULARIZATION_LOSSES)
if regularization_losses:
regularization_loss = tf.add_n(regularization_losses,
name='regularization_loss')
regularization_loss = tf.add_n(
regularization_losses, name='regularization_loss')
losses.append(regularization_loss)
losses_dict['Loss/regularization_loss'] = regularization_loss
total_loss = tf.add_n(losses, name='total_loss')
......@@ -353,14 +372,19 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
eval_metric_ops = None
scaffold = None
if mode == tf.estimator.ModeKeys.EVAL:
class_agnostic = (fields.DetectionResultFields.detection_classes
not in detections)
groundtruth = _prepare_groundtruth_for_eval(
detection_model, class_agnostic)
class_agnostic = (
fields.DetectionResultFields.detection_classes not in detections)
groundtruth = _prepare_groundtruth_for_eval(detection_model,
class_agnostic)
use_original_images = fields.InputDataFields.original_image in features
eval_images = (
features[fields.InputDataFields.original_image] if use_original_images
else features[fields.InputDataFields.image])
if use_original_images:
eval_images = tf.cast(tf.image.resize_bilinear(
features[fields.InputDataFields.original_image][0:1],
features[fields.InputDataFields.original_image_spatial_shape][0]),
tf.uint8)
else:
eval_images = features[fields.InputDataFields.image]
eval_dict = eval_util.result_dict_for_single_example(
eval_images[0:1],
features[inputs.HASH_KEY][0],
......@@ -374,28 +398,26 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
else:
category_index = label_map_util.create_category_index_from_labelmap(
eval_input_config.label_map_path)
img_summary = None
vis_metric_ops = None
if not use_tpu and use_original_images:
detection_and_groundtruth = (
vis_utils.draw_side_by_side_evaluation_image(
eval_dict, category_index, max_boxes_to_draw=eval_config.max_num_boxes_to_visualize,
min_score_thresh=eval_config.min_score_threshold,
use_normalized_coordinates=False))
img_summary = tf.summary.image('Detections_Left_Groundtruth_Right',
detection_and_groundtruth)
eval_metric_op_vis = vis_utils.VisualizeSingleFrameDetections(
category_index,
max_examples_to_draw=eval_config.num_visualizations,
max_boxes_to_draw=eval_config.max_num_boxes_to_visualize,
min_score_thresh=eval_config.min_score_threshold,
use_normalized_coordinates=False)
vis_metric_ops = eval_metric_op_vis.get_estimator_eval_metric_ops(
eval_dict)
# Eval metrics on a single example.
eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
eval_config,
category_index.values(),
eval_dict)
eval_config, category_index.values(), eval_dict)
for loss_key, loss_tensor in iter(losses_dict.items()):
eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor)
for var in optimizer_summary_vars:
eval_metric_ops[var.op.name] = (var, tf.no_op())
if img_summary is not None:
eval_metric_ops['Detections_Left_Groundtruth_Right'] = (
img_summary, tf.no_op())
if vis_metric_ops is not None:
eval_metric_ops.update(vis_metric_ops)
eval_metric_ops = {str(k): v for k, v in eval_metric_ops.items()}
if eval_config.use_moving_averages:
......@@ -435,12 +457,14 @@ def create_estimator_and_inputs(run_config,
hparams,
pipeline_config_path,
train_steps=None,
eval_steps=None,
sample_1_of_n_eval_examples=1,
sample_1_of_n_eval_on_train_examples=1,
model_fn_creator=create_model_fn,
use_tpu_estimator=False,
use_tpu=False,
num_shards=1,
params=None,
override_eval_num_epochs=True,
**kwargs):
"""Creates `Estimator`, input functions, and steps.
......@@ -450,8 +474,11 @@ def create_estimator_and_inputs(run_config,
pipeline_config_path: A path to a pipeline config file.
train_steps: Number of training steps. If None, the number of training steps
is set from the `TrainConfig` proto.
eval_steps: Number of evaluation steps per evaluation cycle. If None, the
number of evaluation steps is set from the `EvalConfig` proto.
sample_1_of_n_eval_examples: Integer representing how often an eval example
should be sampled. If 1, will sample all examples.
sample_1_of_n_eval_on_train_examples: Similar to
`sample_1_of_n_eval_examples`, except controls the sampling of training
data for evaluation.
model_fn_creator: A function that creates a `model_fn` for `Estimator`.
Follows the signature:
......@@ -470,19 +497,20 @@ def create_estimator_and_inputs(run_config,
is True.
params: Parameter dictionary passed from the estimator. Only used if
`use_tpu_estimator` is True.
override_eval_num_epochs: Whether to overwrite the number of epochs to
1 for eval_input.
**kwargs: Additional keyword arguments for configuration override.
Returns:
A dictionary with the following fields:
'estimator': An `Estimator` or `TPUEstimator`.
'train_input_fn': A training input function.
'eval_input_fn': An evaluation input function.
'eval_input_fns': A list of all evaluation input functions.
'eval_input_names': A list of names for each evaluation input.
'eval_on_train_input_fn': An evaluation-on-train input function.
'predict_input_fn': A prediction input function.
'train_steps': Number of training steps. Either directly from input or from
configuration.
'eval_steps': Number of evaluation steps. Either directly from input or from
configuration.
"""
get_configs_from_pipeline_file = MODEL_BUILD_UTIL_MAP[
'get_configs_from_pipeline_file']
......@@ -495,27 +523,36 @@ def create_estimator_and_inputs(run_config,
create_predict_input_fn = MODEL_BUILD_UTIL_MAP['create_predict_input_fn']
configs = get_configs_from_pipeline_file(pipeline_config_path)
kwargs.update({
'train_steps': train_steps,
'sample_1_of_n_eval_examples': sample_1_of_n_eval_examples
})
if override_eval_num_epochs:
kwargs.update({'eval_num_epochs': 1})
tf.logging.warning(
'Forced number of epochs for all eval validations to be 1.')
configs = merge_external_params_with_configs(
configs,
hparams,
train_steps=train_steps,
eval_steps=eval_steps,
retain_original_images_in_eval=False if use_tpu else True,
**kwargs)
configs, hparams, kwargs_dict=kwargs)
model_config = configs['model']
train_config = configs['train_config']
train_input_config = configs['train_input_config']
eval_config = configs['eval_config']
eval_input_config = configs['eval_input_config']
eval_input_configs = configs['eval_input_configs']
eval_on_train_input_config = copy.deepcopy(train_input_config)
eval_on_train_input_config.sample_1_of_n_examples = (
sample_1_of_n_eval_on_train_examples)
if override_eval_num_epochs and eval_on_train_input_config.num_epochs != 1:
tf.logging.warning('Expected number of evaluation epochs is 1, but '
'instead encountered `eval_on_train_input_config'
'.num_epochs` = '
'{}. Overwriting `num_epochs` to 1.'.format(
eval_on_train_input_config.num_epochs))
eval_on_train_input_config.num_epochs = 1
# update train_steps from config but only when non-zero value is provided
if train_steps is None and train_config.num_steps != 0:
train_steps = train_config.num_steps
# update eval_steps from config but only when non-zero value is provided
if eval_steps is None and eval_config.num_examples != 0:
eval_steps = eval_config.num_examples
detection_model_fn = functools.partial(
model_builder.build, model_config=model_config)
......@@ -524,18 +561,25 @@ def create_estimator_and_inputs(run_config,
train_config=train_config,
train_input_config=train_input_config,
model_config=model_config)
eval_input_fn = create_eval_input_fn(
eval_config=eval_config,
eval_input_config=eval_input_config,
model_config=model_config)
eval_input_fns = [
create_eval_input_fn(
eval_config=eval_config,
eval_input_config=eval_input_config,
model_config=model_config) for eval_input_config in eval_input_configs
]
eval_input_names = [
eval_input_config.name for eval_input_config in eval_input_configs
]
eval_on_train_input_fn = create_eval_input_fn(
eval_config=eval_config,
eval_input_config=train_input_config,
eval_input_config=eval_on_train_input_config,
model_config=model_config)
predict_input_fn = create_predict_input_fn(
model_config=model_config, predict_input_config=eval_input_config)
model_config=model_config, predict_input_config=eval_input_configs[0])
tf.logging.info('create_estimator_and_inputs: use_tpu %s', use_tpu)
export_to_tpu = hparams.get('export_to_tpu', False)
tf.logging.info('create_estimator_and_inputs: use_tpu %s, export_to_tpu %s',
use_tpu, export_to_tpu)
model_fn = model_fn_creator(detection_model_fn, configs, hparams, use_tpu)
if use_tpu_estimator:
estimator = tf.contrib.tpu.TPUEstimator(
......@@ -552,89 +596,85 @@ def create_estimator_and_inputs(run_config,
# Write the as-run pipeline config to disk.
if run_config.is_chief:
pipeline_config_final = create_pipeline_proto_from_configs(
configs)
pipeline_config_final = create_pipeline_proto_from_configs(configs)
config_util.save_pipeline_config(pipeline_config_final, estimator.model_dir)
return dict(
estimator=estimator,
train_input_fn=train_input_fn,
eval_input_fn=eval_input_fn,
eval_input_fns=eval_input_fns,
eval_input_names=eval_input_names,
eval_on_train_input_fn=eval_on_train_input_fn,
predict_input_fn=predict_input_fn,
train_steps=train_steps,
eval_steps=eval_steps)
train_steps=train_steps)
def create_train_and_eval_specs(train_input_fn,
eval_input_fn,
eval_input_fns,
eval_on_train_input_fn,
predict_input_fn,
train_steps,
eval_steps,
eval_on_train_data=False,
eval_on_train_steps=None,
final_exporter_name='Servo',
eval_spec_name='eval'):
eval_spec_names=None):
"""Creates a `TrainSpec` and `EvalSpec`s.
Args:
train_input_fn: Function that produces features and labels on train data.
eval_input_fn: Function that produces features and labels on eval data.
eval_input_fns: A list of functions that produce features and labels on eval
data.
eval_on_train_input_fn: Function that produces features and labels for
evaluation on train data.
predict_input_fn: Function that produces features for inference.
train_steps: Number of training steps.
eval_steps: Number of eval steps.
eval_on_train_data: Whether to evaluate model on training data. Default is
False.
eval_on_train_steps: Number of eval steps for training data. If not given,
uses eval_steps.
final_exporter_name: String name given to `FinalExporter`.
eval_spec_name: String name given to main `EvalSpec`.
eval_spec_names: A list of string names for each `EvalSpec`.
Returns:
Tuple of `TrainSpec` and list of `EvalSpecs`. The first `EvalSpec` is for
evaluation data. If `eval_on_train_data` is True, the second `EvalSpec` in
the list will correspond to training data.
Tuple of `TrainSpec` and list of `EvalSpecs`. If `eval_on_train_data` is
True, the last `EvalSpec` in the list will correspond to training data. The
rest EvalSpecs in the list are evaluation datas.
"""
exporter = tf.estimator.FinalExporter(
name=final_exporter_name, serving_input_receiver_fn=predict_input_fn)
train_spec = tf.estimator.TrainSpec(
input_fn=train_input_fn, max_steps=train_steps)
eval_specs = [
tf.estimator.EvalSpec(
name=eval_spec_name,
input_fn=eval_input_fn,
steps=eval_steps,
exporters=exporter)
]
if eval_spec_names is None:
eval_spec_names = range(len(eval_input_fns))
eval_specs = []
for eval_spec_name, eval_input_fn in zip(eval_spec_names, eval_input_fns):
exporter_name = '{}_{}'.format(final_exporter_name, eval_spec_name)
exporter = tf.estimator.FinalExporter(
name=exporter_name, serving_input_receiver_fn=predict_input_fn)
eval_specs.append(
tf.estimator.EvalSpec(
name=eval_spec_name,
input_fn=eval_input_fn,
steps=None,
exporters=exporter))
if eval_on_train_data:
eval_specs.append(
tf.estimator.EvalSpec(
name='eval_on_train', input_fn=eval_on_train_input_fn,
steps=eval_on_train_steps or eval_steps))
name='eval_on_train', input_fn=eval_on_train_input_fn, steps=None))
return train_spec, eval_specs
def continuous_eval(estimator, model_dir, input_fn, eval_steps, train_steps,
name):
def continuous_eval(estimator, model_dir, input_fn, train_steps, name):
"""Perform continuous evaluation on checkpoints written to a model directory.
Args:
estimator: Estimator object to use for evaluation.
model_dir: Model directory to read checkpoints for continuous evaluation.
input_fn: Input function to use for evaluation.
eval_steps: Number of steps to run during each evaluation.
train_steps: Number of training steps. This is used to infer the last
checkpoint and stop evaluation loop.
name: Namescope for eval summary.
"""
def terminate_eval():
tf.logging.info('Terminating eval after 180 seconds of no checkpoints')
return True
......@@ -646,10 +686,7 @@ def continuous_eval(estimator, model_dir, input_fn, eval_steps, train_steps,
tf.logging.info('Starting Evaluation.')
try:
eval_results = estimator.evaluate(
input_fn=input_fn,
steps=eval_steps,
checkpoint_path=ckpt,
name=name)
input_fn=input_fn, steps=None, checkpoint_path=ckpt, name=name)
tf.logging.info('Eval results: %s' % eval_results)
# Terminate eval job when final checkpoint is reached
......@@ -713,10 +750,9 @@ def populate_experiment(run_config,
**kwargs)
estimator = train_and_eval_dict['estimator']
train_input_fn = train_and_eval_dict['train_input_fn']
eval_input_fn = train_and_eval_dict['eval_input_fn']
eval_input_fns = train_and_eval_dict['eval_input_fns']
predict_input_fn = train_and_eval_dict['predict_input_fn']
train_steps = train_and_eval_dict['train_steps']
eval_steps = train_and_eval_dict['eval_steps']
export_strategies = [
tf.contrib.learn.utils.saved_model_export_utils.make_export_strategy(
......@@ -726,8 +762,9 @@ def populate_experiment(run_config,
return tf.contrib.learn.Experiment(
estimator=estimator,
train_input_fn=train_input_fn,
eval_input_fn=eval_input_fn,
eval_input_fn=eval_input_fns[0],
train_steps=train_steps,
eval_steps=eval_steps,
eval_steps=None,
export_strategies=export_strategies,
eval_delay_secs=120,)
eval_delay_secs=120,
)
......@@ -64,11 +64,13 @@ def _get_configs_for_model(model_name):
data_path = _get_data_path()
label_map_path = _get_labelmap_path()
configs = config_util.get_configs_from_pipeline_file(filename)
override_dict = {
'train_input_path': data_path,
'eval_input_path': data_path,
'label_map_path': label_map_path
}
configs = config_util.merge_external_params_with_configs(
configs,
train_input_path=data_path,
eval_input_path=data_path,
label_map_path=label_map_path)
configs, kwargs_dict=override_dict)
return configs
......@@ -145,6 +147,9 @@ class ModelLibTest(tf.test.TestCase):
self.assertEqual(batch_size, detection_scores.shape.as_list()[0])
self.assertEqual(tf.float32, detection_scores.dtype)
self.assertEqual(tf.float32, num_detections.dtype)
if mode == 'eval':
self.assertIn('Detections_Left_Groundtruth_Right/0',
estimator_spec.eval_metric_ops)
if model_mode == tf.estimator.ModeKeys.TRAIN:
self.assertIsNotNone(estimator_spec.train_op)
return estimator_spec
......@@ -225,21 +230,17 @@ class ModelLibTest(tf.test.TestCase):
hparams_overrides='load_pretrained=false')
pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST)
train_steps = 20
eval_steps = 10
train_and_eval_dict = model_lib.create_estimator_and_inputs(
run_config,
hparams,
pipeline_config_path,
train_steps=train_steps,
eval_steps=eval_steps)
train_steps=train_steps)
estimator = train_and_eval_dict['estimator']
train_steps = train_and_eval_dict['train_steps']
eval_steps = train_and_eval_dict['eval_steps']
self.assertIsInstance(estimator, tf.estimator.Estimator)
self.assertEqual(20, train_steps)
self.assertEqual(10, eval_steps)
self.assertIn('train_input_fn', train_and_eval_dict)
self.assertIn('eval_input_fn', train_and_eval_dict)
self.assertIn('eval_input_fns', train_and_eval_dict)
self.assertIn('eval_on_train_input_fn', train_and_eval_dict)
def test_create_estimator_with_default_train_eval_steps(self):
......@@ -250,16 +251,13 @@ class ModelLibTest(tf.test.TestCase):
pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST)
configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
config_train_steps = configs['train_config'].num_steps
config_eval_steps = configs['eval_config'].num_examples
train_and_eval_dict = model_lib.create_estimator_and_inputs(
run_config, hparams, pipeline_config_path)
estimator = train_and_eval_dict['estimator']
train_steps = train_and_eval_dict['train_steps']
eval_steps = train_and_eval_dict['eval_steps']
self.assertIsInstance(estimator, tf.estimator.Estimator)
self.assertEqual(config_train_steps, train_steps)
self.assertEqual(config_eval_steps, eval_steps)
def test_create_tpu_estimator_and_inputs(self):
"""Tests that number of train/eval defaults to config values."""
......@@ -269,21 +267,17 @@ class ModelLibTest(tf.test.TestCase):
hparams_overrides='load_pretrained=false')
pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST)
train_steps = 20
eval_steps = 10
train_and_eval_dict = model_lib.create_estimator_and_inputs(
run_config,
hparams,
pipeline_config_path,
train_steps=train_steps,
eval_steps=eval_steps,
use_tpu_estimator=True)
estimator = train_and_eval_dict['estimator']
train_steps = train_and_eval_dict['train_steps']
eval_steps = train_and_eval_dict['eval_steps']
self.assertIsInstance(estimator, tpu_estimator.TPUEstimator)
self.assertEqual(20, train_steps)
self.assertEqual(10, eval_steps)
def test_create_train_and_eval_specs(self):
"""Tests that `TrainSpec` and `EvalSpec` is created correctly."""
......@@ -292,38 +286,32 @@ class ModelLibTest(tf.test.TestCase):
hparams_overrides='load_pretrained=false')
pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST)
train_steps = 20
eval_steps = 10
eval_on_train_steps = 15
train_and_eval_dict = model_lib.create_estimator_and_inputs(
run_config,
hparams,
pipeline_config_path,
train_steps=train_steps,
eval_steps=eval_steps)
train_steps=train_steps)
train_input_fn = train_and_eval_dict['train_input_fn']
eval_input_fn = train_and_eval_dict['eval_input_fn']
eval_input_fns = train_and_eval_dict['eval_input_fns']
eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn']
predict_input_fn = train_and_eval_dict['predict_input_fn']
train_steps = train_and_eval_dict['train_steps']
eval_steps = train_and_eval_dict['eval_steps']
train_spec, eval_specs = model_lib.create_train_and_eval_specs(
train_input_fn,
eval_input_fn,
eval_input_fns,
eval_on_train_input_fn,
predict_input_fn,
train_steps,
eval_steps,
eval_on_train_data=True,
eval_on_train_steps=eval_on_train_steps,
final_exporter_name='exporter',
eval_spec_name='holdout')
eval_spec_names=['holdout'])
self.assertEqual(train_steps, train_spec.max_steps)
self.assertEqual(2, len(eval_specs))
self.assertEqual(eval_steps, eval_specs[0].steps)
self.assertEqual(None, eval_specs[0].steps)
self.assertEqual('holdout', eval_specs[0].name)
self.assertEqual('exporter', eval_specs[0].exporters[0].name)
self.assertEqual(eval_on_train_steps, eval_specs[1].steps)
self.assertEqual('exporter_holdout', eval_specs[0].exporters[0].name)
self.assertEqual(None, eval_specs[1].steps)
self.assertEqual('eval_on_train', eval_specs[1].name)
def test_experiment(self):
......@@ -339,7 +327,7 @@ class ModelLibTest(tf.test.TestCase):
train_steps=10,
eval_steps=20)
self.assertEqual(10, experiment.train_steps)
self.assertEqual(20, experiment.eval_steps)
self.assertEqual(None, experiment.eval_steps)
class UnbatchTensorsTest(tf.test.TestCase):
......
......@@ -31,7 +31,16 @@ flags.DEFINE_string(
flags.DEFINE_string('pipeline_config_path', None, 'Path to pipeline config '
'file.')
flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.')
flags.DEFINE_integer('num_eval_steps', None, 'Number of train steps.')
flags.DEFINE_boolean('eval_training_data', False,
'If training data should be evaluated for this job. Note '
'that one call only use this in eval-only mode, and '
'`checkpoint_dir` must be supplied.')
flags.DEFINE_integer('sample_1_of_n_eval_examples', 1, 'Will sample one of '
'every n eval input examples, where n is provided.')
flags.DEFINE_integer('sample_1_of_n_eval_on_train_examples', 5, 'Will sample '
'one of every n train input examples for evaluation, '
'where n is provided. This is only used if '
'`eval_training_data` is True.')
flags.DEFINE_string(
'hparams_overrides', None, 'Hyperparameter overrides, '
'represented as a string containing comma-separated '
......@@ -44,8 +53,6 @@ flags.DEFINE_boolean(
'run_once', False, 'If running in eval-only mode, whether to run just '
'one round of eval vs running continuously (default).'
)
flags.DEFINE_boolean('eval_training_data', False,
'If training data should be evaluated for this job.')
FLAGS = flags.FLAGS
......@@ -59,14 +66,15 @@ def main(unused_argv):
hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
pipeline_config_path=FLAGS.pipeline_config_path,
train_steps=FLAGS.num_train_steps,
eval_steps=FLAGS.num_eval_steps)
sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples,
sample_1_of_n_eval_on_train_examples=(
FLAGS.sample_1_of_n_eval_on_train_examples))
estimator = train_and_eval_dict['estimator']
train_input_fn = train_and_eval_dict['train_input_fn']
eval_input_fn = train_and_eval_dict['eval_input_fn']
eval_input_fns = train_and_eval_dict['eval_input_fns']
eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn']
predict_input_fn = train_and_eval_dict['predict_input_fn']
train_steps = train_and_eval_dict['train_steps']
eval_steps = train_and_eval_dict['eval_steps']
if FLAGS.checkpoint_dir:
if FLAGS.eval_training_data:
......@@ -74,23 +82,23 @@ def main(unused_argv):
input_fn = eval_on_train_input_fn
else:
name = 'validation_data'
input_fn = eval_input_fn
# The first eval input will be evaluated.
input_fn = eval_input_fns[0]
if FLAGS.run_once:
estimator.evaluate(input_fn,
eval_steps,
num_eval_steps=None,
checkpoint_path=tf.train.latest_checkpoint(
FLAGS.checkpoint_dir))
else:
model_lib.continuous_eval(estimator, FLAGS.model_dir, input_fn,
eval_steps, train_steps, name)
model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn,
train_steps, name)
else:
train_spec, eval_specs = model_lib.create_train_and_eval_specs(
train_input_fn,
eval_input_fn,
eval_input_fns,
eval_on_train_input_fn,
predict_input_fn,
train_steps,
eval_steps,
eval_on_train_data=False)
# Currently only a single Eval Spec is allowed.
......
......@@ -62,15 +62,20 @@ flags.DEFINE_integer('train_batch_size', None, 'Batch size for training. If '
flags.DEFINE_string(
'hparams_overrides', None, 'Comma-separated list of '
'hyperparameters to override defaults.')
flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.')
flags.DEFINE_boolean('eval_training_data', False,
'If training data should be evaluated for this job.')
flags.DEFINE_integer('sample_1_of_n_eval_examples', 1, 'Will sample one of '
'every n eval input examples, where n is provided.')
flags.DEFINE_integer('sample_1_of_n_eval_on_train_examples', 5, 'Will sample '
'one of every n train input examples for evaluation, '
'where n is provided. This is only used if '
'`eval_training_data` is True.')
flags.DEFINE_string(
'model_dir', None, 'Path to output model directory '
'where event and checkpoint files will be written.')
flags.DEFINE_string('pipeline_config_path', None, 'Path to pipeline config '
'file.')
flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.')
flags.DEFINE_integer('num_eval_steps', None, 'Number of train steps.')
FLAGS = tf.flags.FLAGS
......@@ -103,17 +108,18 @@ def main(unused_argv):
hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
pipeline_config_path=FLAGS.pipeline_config_path,
train_steps=FLAGS.num_train_steps,
eval_steps=FLAGS.num_eval_steps,
sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples,
sample_1_of_n_eval_on_train_examples=(
FLAGS.sample_1_of_n_eval_on_train_examples),
use_tpu_estimator=True,
use_tpu=FLAGS.use_tpu,
num_shards=FLAGS.num_shards,
**kwargs)
estimator = train_and_eval_dict['estimator']
train_input_fn = train_and_eval_dict['train_input_fn']
eval_input_fn = train_and_eval_dict['eval_input_fn']
eval_input_fns = train_and_eval_dict['eval_input_fns']
eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn']
train_steps = train_and_eval_dict['train_steps']
eval_steps = train_and_eval_dict['eval_steps']
if FLAGS.mode == 'train':
estimator.train(input_fn=train_input_fn, max_steps=train_steps)
......@@ -125,9 +131,10 @@ def main(unused_argv):
input_fn = eval_on_train_input_fn
else:
name = 'validation_data'
input_fn = eval_input_fn
model_lib.continuous_eval(estimator, FLAGS.model_dir, input_fn, eval_steps,
train_steps, name)
# Currently only a single eval input is allowed.
input_fn = eval_input_fns[0]
model_lib.continuous_eval(estimator, FLAGS.model_dir, input_fn, train_steps,
name)
if __name__ == '__main__':
......
......@@ -24,6 +24,7 @@ Feature map generators build on the base feature extractors and produce a list
of final feature maps.
"""
import collections
import functools
import tensorflow as tf
from object_detection.utils import ops
slim = tf.contrib.slim
......@@ -45,6 +46,222 @@ def get_depth_fn(depth_multiplier, min_depth):
return multiply_depth
class KerasMultiResolutionFeatureMaps(tf.keras.Model):
"""Generates multi resolution feature maps from input image features.
A Keras model that generates multi-scale feature maps for detection as in the
SSD papers by Liu et al: https://arxiv.org/pdf/1512.02325v2.pdf, See Sec 2.1.
More specifically, when called on inputs it performs the following two tasks:
1) If a layer name is provided in the configuration, returns that layer as a
feature map.
2) If a layer name is left as an empty string, constructs a new feature map
based on the spatial shape and depth configuration. Note that the current
implementation only supports generating new layers using convolution of
stride 2 resulting in a spatial resolution reduction by a factor of 2.
By default convolution kernel size is set to 3, and it can be customized
by caller.
An example of the configuration for Inception V3:
{
'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 128]
}
When this feature generator object is called on input image_features:
Args:
image_features: A dictionary of handles to activation tensors from the
base feature extractor.
Returns:
feature_maps: an OrderedDict mapping keys (feature map names) to
tensors where each tensor has shape [batch, height_i, width_i, depth_i].
"""
def __init__(self,
feature_map_layout,
depth_multiplier,
min_depth,
insert_1x1_conv,
is_training,
conv_hyperparams,
freeze_batchnorm,
name=None):
"""Constructor.
Args:
feature_map_layout: Dictionary of specifications for the feature map
layouts in the following format (Inception V2/V3 respectively):
{
'from_layer': ['Mixed_3c', 'Mixed_4c', 'Mixed_5c', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 128]
}
or
{
'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 128]
}
If 'from_layer' is specified, the specified feature map is directly used
as a box predictor layer, and the layer_depth is directly infered from
the feature map (instead of using the provided 'layer_depth' parameter).
In this case, our convention is to set 'layer_depth' to -1 for clarity.
Otherwise, if 'from_layer' is an empty string, then the box predictor
layer will be built from the previous layer using convolution
operations. Note that the current implementation only supports
generating new layers using convolutions of stride 2 (resulting in a
spatial resolution reduction by a factor of 2), and will be extended to
a more flexible design. Convolution kernel size is set to 3 by default,
and can be customized by 'conv_kernel_size' parameter (similarily,
'conv_kernel_size' should be set to -1 if 'from_layer' is specified).
The created convolution operation will be a normal 2D convolution by
default, and a depthwise convolution followed by 1x1 convolution if
'use_depthwise' is set to True.
depth_multiplier: Depth multiplier for convolutional layers.
min_depth: Minimum depth for convolutional layers.
insert_1x1_conv: A boolean indicating whether an additional 1x1
convolution should be inserted before shrinking the feature map.
is_training: Indicates whether the feature generator is in training mode.
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
containing hyperparameters for convolution ops.
freeze_batchnorm: Bool. Whether to freeze batch norm parameters during
training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm
params.
name: A string name scope to assign to the model. If 'None', Keras
will auto-generate one from the class name.
"""
super(KerasMultiResolutionFeatureMaps, self).__init__(name=name)
self.feature_map_layout = feature_map_layout
self.convolutions = []
depth_fn = get_depth_fn(depth_multiplier, min_depth)
base_from_layer = ''
use_explicit_padding = False
if 'use_explicit_padding' in feature_map_layout:
use_explicit_padding = feature_map_layout['use_explicit_padding']
use_depthwise = False
if 'use_depthwise' in feature_map_layout:
use_depthwise = feature_map_layout['use_depthwise']
for index, from_layer in enumerate(feature_map_layout['from_layer']):
net = []
self.convolutions.append(net)
layer_depth = feature_map_layout['layer_depth'][index]
conv_kernel_size = 3
if 'conv_kernel_size' in feature_map_layout:
conv_kernel_size = feature_map_layout['conv_kernel_size'][index]
if from_layer:
base_from_layer = from_layer
else:
if insert_1x1_conv:
layer_name = '{}_1_Conv2d_{}_1x1_{}'.format(
base_from_layer, index, depth_fn(layer_depth / 2))
net.append(tf.keras.layers.Conv2D(depth_fn(layer_depth / 2),
[1, 1],
padding='SAME',
strides=1,
name=layer_name + '_conv',
**conv_hyperparams.params()))
net.append(
conv_hyperparams.build_batch_norm(
training=(is_training and not freeze_batchnorm),
name=layer_name + '_batchnorm'))
net.append(
conv_hyperparams.build_activation_layer(
name=layer_name))
layer_name = '{}_2_Conv2d_{}_{}x{}_s2_{}'.format(
base_from_layer, index, conv_kernel_size, conv_kernel_size,
depth_fn(layer_depth))
stride = 2
padding = 'SAME'
if use_explicit_padding:
padding = 'VALID'
# We define this function here while capturing the value of
# conv_kernel_size, to avoid holding a reference to the loop variable
# conv_kernel_size inside of a lambda function
def fixed_padding(features, kernel_size=conv_kernel_size):
return ops.fixed_padding(features, kernel_size)
net.append(tf.keras.layers.Lambda(fixed_padding))
# TODO(rathodv): Add some utilities to simplify the creation of
# Depthwise & non-depthwise convolutions w/ normalization & activations
if use_depthwise:
net.append(tf.keras.layers.DepthwiseConv2D(
[conv_kernel_size, conv_kernel_size],
depth_multiplier=1,
padding=padding,
strides=stride,
name=layer_name + '_depthwise_conv',
**conv_hyperparams.params()))
net.append(
conv_hyperparams.build_batch_norm(
training=(is_training and not freeze_batchnorm),
name=layer_name + '_depthwise_batchnorm'))
net.append(
conv_hyperparams.build_activation_layer(
name=layer_name + '_depthwise'))
net.append(tf.keras.layers.Conv2D(depth_fn(layer_depth), [1, 1],
padding='SAME',
strides=1,
name=layer_name + '_conv',
**conv_hyperparams.params()))
net.append(
conv_hyperparams.build_batch_norm(
training=(is_training and not freeze_batchnorm),
name=layer_name + '_batchnorm'))
net.append(
conv_hyperparams.build_activation_layer(
name=layer_name))
else:
net.append(tf.keras.layers.Conv2D(
depth_fn(layer_depth),
[conv_kernel_size, conv_kernel_size],
padding=padding,
strides=stride,
name=layer_name + '_conv',
**conv_hyperparams.params()))
net.append(
conv_hyperparams.build_batch_norm(
training=(is_training and not freeze_batchnorm),
name=layer_name + '_batchnorm'))
net.append(
conv_hyperparams.build_activation_layer(
name=layer_name))
def call(self, image_features):
"""Generate the multi-resolution feature maps.
Executed when calling the `.__call__` method on input.
Args:
image_features: A dictionary of handles to activation tensors from the
base feature extractor.
Returns:
feature_maps: an OrderedDict mapping keys (feature map names) to
tensors where each tensor has shape [batch, height_i, width_i, depth_i].
"""
feature_maps = []
feature_map_keys = []
for index, from_layer in enumerate(self.feature_map_layout['from_layer']):
if from_layer:
feature_map = image_features[from_layer]
feature_map_keys.append(from_layer)
else:
feature_map = feature_maps[-1]
for layer in self.convolutions[index]:
feature_map = layer(feature_map)
layer_name = self.convolutions[index][-1].name
feature_map_keys.append(layer_name)
feature_maps.append(feature_map)
return collections.OrderedDict(
[(x, y) for (x, y) in zip(feature_map_keys, feature_maps)])
def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
min_depth, insert_1x1_conv, image_features):
"""Generates multi resolution feature maps from input image features.
......@@ -77,7 +294,7 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
}
or
{
'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', '', ''],
'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 128]
}
If 'from_layer' is specified, the specified feature map is directly used
......@@ -179,7 +396,10 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
[(x, y) for (x, y) in zip(feature_map_keys, feature_maps)])
def fpn_top_down_feature_maps(image_features, depth, scope=None):
def fpn_top_down_feature_maps(image_features,
depth,
use_depthwise=False,
scope=None):
"""Generates `top-down` feature maps for Feature Pyramid Networks.
See https://arxiv.org/abs/1612.03144 for details.
......@@ -189,6 +409,7 @@ def fpn_top_down_feature_maps(image_features, depth, scope=None):
Spatial resolutions of succesive tensors must reduce exactly by a factor
of 2.
depth: depth of output feature maps.
use_depthwise: use depthwise separable conv instead of regular conv.
scope: A scope name to wrap this op under.
Returns:
......@@ -200,7 +421,7 @@ def fpn_top_down_feature_maps(image_features, depth, scope=None):
output_feature_maps_list = []
output_feature_map_keys = []
with slim.arg_scope(
[slim.conv2d], padding='SAME', stride=1):
[slim.conv2d, slim.separable_conv2d], padding='SAME', stride=1):
top_down = slim.conv2d(
image_features[-1][1],
depth, [1, 1], activation_fn=None, normalizer_fn=None,
......@@ -216,7 +437,11 @@ def fpn_top_down_feature_maps(image_features, depth, scope=None):
activation_fn=None, normalizer_fn=None,
scope='projection_%d' % (level + 1))
top_down += residual
output_feature_maps_list.append(slim.conv2d(
if use_depthwise:
conv_op = functools.partial(slim.separable_conv2d, depth_multiplier=1)
else:
conv_op = slim.conv2d
output_feature_maps_list.append(conv_op(
top_down,
depth, [3, 3],
scope='smoothing_%d' % (level + 1)))
......@@ -226,7 +451,7 @@ def fpn_top_down_feature_maps(image_features, depth, scope=None):
def pooling_pyramid_feature_maps(base_feature_map_depth, num_layers,
image_features):
image_features, replace_pool_with_conv=False):
"""Generates pooling pyramid feature maps.
The pooling pyramid feature maps is motivated by
......@@ -250,6 +475,8 @@ def pooling_pyramid_feature_maps(base_feature_map_depth, num_layers,
from the base feature.
image_features: A dictionary of handles to activation tensors from the
feature extractor.
replace_pool_with_conv: Whether or not to replace pooling operations with
convolutions in the PPN. Default is False.
Returns:
feature_maps: an OrderedDict mapping keys (feature map names) to
......@@ -279,12 +506,22 @@ def pooling_pyramid_feature_maps(base_feature_map_depth, num_layers,
feature_map_keys.append(feature_map_key)
feature_maps.append(image_features)
feature_map = image_features
with slim.arg_scope([slim.max_pool2d], padding='SAME', stride=2):
for i in range(num_layers - 1):
feature_map_key = 'MaxPool2d_%d_2x2' % i
feature_map = slim.max_pool2d(
feature_map, [2, 2], padding='SAME', scope=feature_map_key)
feature_map_keys.append(feature_map_key)
feature_maps.append(feature_map)
if replace_pool_with_conv:
with slim.arg_scope([slim.conv2d], padding='SAME', stride=2):
for i in range(num_layers - 1):
feature_map_key = 'Conv2d_{}_3x3_s2_{}'.format(i,
base_feature_map_depth)
feature_map = slim.conv2d(
feature_map, base_feature_map_depth, [3, 3], scope=feature_map_key)
feature_map_keys.append(feature_map_key)
feature_maps.append(feature_map)
else:
with slim.arg_scope([slim.max_pool2d], padding='SAME', stride=2):
for i in range(num_layers - 1):
feature_map_key = 'MaxPool2d_%d_2x2' % i
feature_map = slim.max_pool2d(
feature_map, [2, 2], padding='SAME', scope=feature_map_key)
feature_map_keys.append(feature_map_key)
feature_maps.append(feature_map)
return collections.OrderedDict(
[(x, y) for (x, y) in zip(feature_map_keys, feature_maps)])
......@@ -15,9 +15,15 @@
"""Tests for feature map generators."""
from absl.testing import parameterized
import tensorflow as tf
from google.protobuf import text_format
from object_detection.builders import hyperparams_builder
from object_detection.models import feature_map_generators
from object_detection.protos import hyperparams_pb2
INCEPTION_V2_LAYOUT = {
'from_layer': ['Mixed_3c', 'Mixed_4c', 'Mixed_5c', '', '', ''],
......@@ -40,21 +46,60 @@ EMBEDDED_SSD_MOBILENET_V1_LAYOUT = {
}
# TODO(rathodv): add tests with different anchor strides.
@parameterized.parameters(
{'use_keras': False},
{'use_keras': True},
)
class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
def test_get_expected_feature_map_shapes_with_inception_v2(self):
def _build_conv_hyperparams(self):
conv_hyperparams = hyperparams_pb2.Hyperparams()
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
"""
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
def _build_feature_map_generator(self, feature_map_layout, use_keras):
if use_keras:
return feature_map_generators.KerasMultiResolutionFeatureMaps(
feature_map_layout=feature_map_layout,
depth_multiplier=1,
min_depth=32,
insert_1x1_conv=True,
freeze_batchnorm=False,
is_training=True,
conv_hyperparams=self._build_conv_hyperparams(),
name='FeatureMaps'
)
else:
def feature_map_generator(image_features):
return feature_map_generators.multi_resolution_feature_maps(
feature_map_layout=feature_map_layout,
depth_multiplier=1,
min_depth=32,
insert_1x1_conv=True,
image_features=image_features)
return feature_map_generator
def test_get_expected_feature_map_shapes_with_inception_v2(self, use_keras):
image_features = {
'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32),
'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32)
}
feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_map_generator = self._build_feature_map_generator(
feature_map_layout=INCEPTION_V2_LAYOUT,
depth_multiplier=1,
min_depth=32,
insert_1x1_conv=True,
image_features=image_features)
use_keras=use_keras
)
feature_maps = feature_map_generator(image_features)
expected_feature_map_shapes = {
'Mixed_3c': (4, 28, 28, 256),
......@@ -70,21 +115,53 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
out_feature_maps = sess.run(feature_maps)
out_feature_map_shapes = dict(
(key, value.shape) for key, value in out_feature_maps.items())
self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
def test_get_expected_feature_map_shapes_use_explicit_padding(
self, use_keras):
image_features = {
'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32),
'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32)
}
layout_copy = INCEPTION_V2_LAYOUT.copy()
layout_copy['use_explicit_padding'] = True
feature_map_generator = self._build_feature_map_generator(
feature_map_layout=layout_copy,
use_keras=use_keras
)
feature_maps = feature_map_generator(image_features)
expected_feature_map_shapes = {
'Mixed_3c': (4, 28, 28, 256),
'Mixed_4c': (4, 14, 14, 576),
'Mixed_5c': (4, 7, 7, 1024),
'Mixed_5c_2_Conv2d_3_3x3_s2_512': (4, 4, 4, 512),
'Mixed_5c_2_Conv2d_4_3x3_s2_256': (4, 2, 2, 256),
'Mixed_5c_2_Conv2d_5_3x3_s2_256': (4, 1, 1, 256)}
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
out_feature_maps = sess.run(feature_maps)
out_feature_map_shapes = dict(
(key, value.shape) for key, value in out_feature_maps.items())
self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
def test_get_expected_feature_map_shapes_with_inception_v3(self):
def test_get_expected_feature_map_shapes_with_inception_v3(self, use_keras):
image_features = {
'Mixed_5d': tf.random_uniform([4, 35, 35, 256], dtype=tf.float32),
'Mixed_6e': tf.random_uniform([4, 17, 17, 576], dtype=tf.float32),
'Mixed_7c': tf.random_uniform([4, 8, 8, 1024], dtype=tf.float32)
}
feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_map_generator = self._build_feature_map_generator(
feature_map_layout=INCEPTION_V3_LAYOUT,
depth_multiplier=1,
min_depth=32,
insert_1x1_conv=True,
image_features=image_features)
use_keras=use_keras
)
feature_maps = feature_map_generator(image_features)
expected_feature_map_shapes = {
'Mixed_5d': (4, 35, 35, 256),
......@@ -100,10 +177,10 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
out_feature_maps = sess.run(feature_maps)
out_feature_map_shapes = dict(
(key, value.shape) for key, value in out_feature_maps.items())
self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
def test_get_expected_feature_map_shapes_with_embedded_ssd_mobilenet_v1(
self):
self, use_keras):
image_features = {
'Conv2d_11_pointwise': tf.random_uniform([4, 16, 16, 512],
dtype=tf.float32),
......@@ -111,12 +188,11 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
dtype=tf.float32),
}
feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_map_generator = self._build_feature_map_generator(
feature_map_layout=EMBEDDED_SSD_MOBILENET_V1_LAYOUT,
depth_multiplier=1,
min_depth=32,
insert_1x1_conv=True,
image_features=image_features)
use_keras=use_keras
)
feature_maps = feature_map_generator(image_features)
expected_feature_map_shapes = {
'Conv2d_11_pointwise': (4, 16, 16, 512),
......@@ -131,7 +207,62 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
out_feature_maps = sess.run(feature_maps)
out_feature_map_shapes = dict(
(key, value.shape) for key, value in out_feature_maps.items())
self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
def test_get_expected_variable_names_with_inception_v2(self, use_keras):
image_features = {
'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32),
'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32)
}
feature_map_generator = self._build_feature_map_generator(
feature_map_layout=INCEPTION_V2_LAYOUT,
use_keras=use_keras
)
feature_maps = feature_map_generator(image_features)
expected_slim_variables = set([
'Mixed_5c_1_Conv2d_3_1x1_256/weights',
'Mixed_5c_1_Conv2d_3_1x1_256/biases',
'Mixed_5c_2_Conv2d_3_3x3_s2_512/weights',
'Mixed_5c_2_Conv2d_3_3x3_s2_512/biases',
'Mixed_5c_1_Conv2d_4_1x1_128/weights',
'Mixed_5c_1_Conv2d_4_1x1_128/biases',
'Mixed_5c_2_Conv2d_4_3x3_s2_256/weights',
'Mixed_5c_2_Conv2d_4_3x3_s2_256/biases',
'Mixed_5c_1_Conv2d_5_1x1_128/weights',
'Mixed_5c_1_Conv2d_5_1x1_128/biases',
'Mixed_5c_2_Conv2d_5_3x3_s2_256/weights',
'Mixed_5c_2_Conv2d_5_3x3_s2_256/biases',
])
expected_keras_variables = set([
'FeatureMaps/Mixed_5c_1_Conv2d_3_1x1_256_conv/kernel',
'FeatureMaps/Mixed_5c_1_Conv2d_3_1x1_256_conv/bias',
'FeatureMaps/Mixed_5c_2_Conv2d_3_3x3_s2_512_conv/kernel',
'FeatureMaps/Mixed_5c_2_Conv2d_3_3x3_s2_512_conv/bias',
'FeatureMaps/Mixed_5c_1_Conv2d_4_1x1_128_conv/kernel',
'FeatureMaps/Mixed_5c_1_Conv2d_4_1x1_128_conv/bias',
'FeatureMaps/Mixed_5c_2_Conv2d_4_3x3_s2_256_conv/kernel',
'FeatureMaps/Mixed_5c_2_Conv2d_4_3x3_s2_256_conv/bias',
'FeatureMaps/Mixed_5c_1_Conv2d_5_1x1_128_conv/kernel',
'FeatureMaps/Mixed_5c_1_Conv2d_5_1x1_128_conv/bias',
'FeatureMaps/Mixed_5c_2_Conv2d_5_3x3_s2_256_conv/kernel',
'FeatureMaps/Mixed_5c_2_Conv2d_5_3x3_s2_256_conv/bias',
])
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
sess.run(feature_maps)
actual_variable_set = set(
[var.op.name for var in tf.trainable_variables()])
if use_keras:
self.assertSetEqual(expected_keras_variables, actual_variable_set)
else:
self.assertSetEqual(expected_slim_variables, actual_variable_set)
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
class FPNFeatureMapGeneratorTest(tf.test.TestCase):
......@@ -161,6 +292,31 @@ class FPNFeatureMapGeneratorTest(tf.test.TestCase):
for key, value in out_feature_maps.items()}
self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
def test_get_expected_feature_map_shapes_with_depthwise(self):
image_features = [
('block2', tf.random_uniform([4, 8, 8, 256], dtype=tf.float32)),
('block3', tf.random_uniform([4, 4, 4, 256], dtype=tf.float32)),
('block4', tf.random_uniform([4, 2, 2, 256], dtype=tf.float32)),
('block5', tf.random_uniform([4, 1, 1, 256], dtype=tf.float32))
]
feature_maps = feature_map_generators.fpn_top_down_feature_maps(
image_features=image_features, depth=128, use_depthwise=True)
expected_feature_map_shapes = {
'top_down_block2': (4, 8, 8, 128),
'top_down_block3': (4, 4, 4, 128),
'top_down_block4': (4, 2, 2, 128),
'top_down_block5': (4, 1, 1, 128)
}
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
out_feature_maps = sess.run(feature_maps)
out_feature_map_shapes = {key: value.shape
for key, value in out_feature_maps.items()}
self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
class GetDepthFunctionTest(tf.test.TestCase):
......@@ -175,5 +331,94 @@ class GetDepthFunctionTest(tf.test.TestCase):
self.assertEqual(depth_fn(64), 32)
@parameterized.parameters(
{'replace_pool_with_conv': False},
{'replace_pool_with_conv': True},
)
class PoolingPyramidFeatureMapGeneratorTest(tf.test.TestCase):
def test_get_expected_feature_map_shapes(self, replace_pool_with_conv):
image_features = {
'image_features': tf.random_uniform([4, 19, 19, 1024])
}
feature_maps = feature_map_generators.pooling_pyramid_feature_maps(
base_feature_map_depth=1024,
num_layers=6,
image_features=image_features,
replace_pool_with_conv=replace_pool_with_conv)
expected_pool_feature_map_shapes = {
'Base_Conv2d_1x1_1024': (4, 19, 19, 1024),
'MaxPool2d_0_2x2': (4, 10, 10, 1024),
'MaxPool2d_1_2x2': (4, 5, 5, 1024),
'MaxPool2d_2_2x2': (4, 3, 3, 1024),
'MaxPool2d_3_2x2': (4, 2, 2, 1024),
'MaxPool2d_4_2x2': (4, 1, 1, 1024),
}
expected_conv_feature_map_shapes = {
'Base_Conv2d_1x1_1024': (4, 19, 19, 1024),
'Conv2d_0_3x3_s2_1024': (4, 10, 10, 1024),
'Conv2d_1_3x3_s2_1024': (4, 5, 5, 1024),
'Conv2d_2_3x3_s2_1024': (4, 3, 3, 1024),
'Conv2d_3_3x3_s2_1024': (4, 2, 2, 1024),
'Conv2d_4_3x3_s2_1024': (4, 1, 1, 1024),
}
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
out_feature_maps = sess.run(feature_maps)
out_feature_map_shapes = {key: value.shape
for key, value in out_feature_maps.items()}
if replace_pool_with_conv:
self.assertDictEqual(expected_conv_feature_map_shapes,
out_feature_map_shapes)
else:
self.assertDictEqual(expected_pool_feature_map_shapes,
out_feature_map_shapes)
def test_get_expected_variable_names(self, replace_pool_with_conv):
image_features = {
'image_features': tf.random_uniform([4, 19, 19, 1024])
}
feature_maps = feature_map_generators.pooling_pyramid_feature_maps(
base_feature_map_depth=1024,
num_layers=6,
image_features=image_features,
replace_pool_with_conv=replace_pool_with_conv)
expected_pool_variables = set([
'Base_Conv2d_1x1_1024/weights',
'Base_Conv2d_1x1_1024/biases',
])
expected_conv_variables = set([
'Base_Conv2d_1x1_1024/weights',
'Base_Conv2d_1x1_1024/biases',
'Conv2d_0_3x3_s2_1024/weights',
'Conv2d_0_3x3_s2_1024/biases',
'Conv2d_1_3x3_s2_1024/weights',
'Conv2d_1_3x3_s2_1024/biases',
'Conv2d_2_3x3_s2_1024/weights',
'Conv2d_2_3x3_s2_1024/biases',
'Conv2d_3_3x3_s2_1024/weights',
'Conv2d_3_3x3_s2_1024/biases',
'Conv2d_4_3x3_s2_1024/weights',
'Conv2d_4_3x3_s2_1024/biases',
])
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
sess.run(feature_maps)
actual_variable_set = set(
[var.op.name for var in tf.trainable_variables()])
if replace_pool_with_conv:
self.assertSetEqual(expected_conv_variables, actual_variable_set)
else:
self.assertSetEqual(expected_pool_variables, actual_variable_set)
if __name__ == '__main__':
tf.test.main()
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A wrapper around the MobileNet v2 models for Keras, for object detection."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from object_detection.core import freezable_batch_norm
from object_detection.utils import ops
# pylint: disable=invalid-name
# This method copied from the slim mobilenet base network code (same license)
def _make_divisible(v, divisor, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
class _LayersOverride(object):
"""Alternative Keras layers interface for the Keras MobileNetV2."""
def __init__(self,
batchnorm_training,
default_batchnorm_momentum=0.999,
conv_hyperparams=None,
use_explicit_padding=False,
alpha=1.0,
min_depth=None):
"""Alternative tf.keras.layers interface, for use by the Keras MobileNetV2.
It is used by the Keras applications kwargs injection API to
modify the Mobilenet v2 Keras application with changes required by
the Object Detection API.
These injected interfaces make the following changes to the network:
- Applies the Object Detection hyperparameter configuration
- Supports FreezableBatchNorms
- Adds support for a min number of filters for each layer
- Makes the `alpha` parameter affect the final convolution block even if it
is less than 1.0
- Adds support for explicit padding of convolutions
Args:
batchnorm_training: Bool. Assigned to Batch norm layer `training` param
when constructing `freezable_batch_norm.FreezableBatchNorm` layers.
default_batchnorm_momentum: Float. When 'conv_hyperparams' is None,
batch norm layers will be constructed using this value as the momentum.
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
containing hyperparameters for convolution ops. Optionally set to `None`
to use default mobilenet_v2 layer builders.
use_explicit_padding: If True, use 'valid' padding for convolutions,
but explicitly pre-pads inputs so that the output dimensions are the
same as if 'same' padding were used. Off by default.
alpha: The width multiplier referenced in the MobileNetV2 paper. It
modifies the number of filters in each convolutional layer.
min_depth: Minimum number of filters in the convolutional layers.
"""
self._alpha = alpha
self._batchnorm_training = batchnorm_training
self._default_batchnorm_momentum = default_batchnorm_momentum
self._conv_hyperparams = conv_hyperparams
self._use_explicit_padding = use_explicit_padding
self._min_depth = min_depth
def _FixedPaddingLayer(self, kernel_size):
return tf.keras.layers.Lambda(lambda x: ops.fixed_padding(x, kernel_size))
def Conv2D(self, filters, **kwargs):
"""Builds a Conv2D layer according to the current Object Detection config.
Overrides the Keras MobileNetV2 application's convolutions with ones that
follow the spec specified by the Object Detection hyperparameters.
Args:
filters: The number of filters to use for the convolution.
**kwargs: Keyword args specified by the Keras application for
constructing the convolution.
Returns:
A one-arg callable that will either directly apply a Keras Conv2D layer to
the input argument, or that will first pad the input then apply a Conv2D
layer.
"""
# Make sure 'alpha' is always applied to the last convolution block's size
# (This overrides the Keras application's functionality)
if kwargs.get('name') == 'Conv_1' and self._alpha < 1.0:
filters = _make_divisible(1280 * self._alpha, 8)
# Apply the minimum depth to the convolution layers
if (self._min_depth and (filters < self._min_depth)
and not kwargs.get('name').endswith('expand')):
filters = self._min_depth
if self._conv_hyperparams:
kwargs = self._conv_hyperparams.params(**kwargs)
kwargs['padding'] = 'same'
kernel_size = kwargs.get('kernel_size')
if self._use_explicit_padding and kernel_size > 1:
kwargs['padding'] = 'valid'
def padded_conv(features):
padded_features = self._FixedPaddingLayer(kernel_size)(features)
return tf.keras.layers.Conv2D(filters, **kwargs)(padded_features)
return padded_conv
else:
return tf.keras.layers.Conv2D(filters, **kwargs)
def DepthwiseConv2D(self, **kwargs):
"""Builds a DepthwiseConv2D according to the Object Detection config.
Overrides the Keras MobileNetV2 application's convolutions with ones that
follow the spec specified by the Object Detection hyperparameters.
Args:
**kwargs: Keyword args specified by the Keras application for
constructing the convolution.
Returns:
A one-arg callable that will either directly apply a Keras DepthwiseConv2D
layer to the input argument, or that will first pad the input then apply
the depthwise convolution.
"""
if self._conv_hyperparams:
kwargs = self._conv_hyperparams.params(**kwargs)
kwargs['padding'] = 'same'
kernel_size = kwargs.get('kernel_size')
if self._use_explicit_padding and kernel_size > 1:
kwargs['padding'] = 'valid'
def padded_depthwise_conv(features):
padded_features = self._FixedPaddingLayer(kernel_size)(features)
return tf.keras.layers.DepthwiseConv2D(**kwargs)(padded_features)
return padded_depthwise_conv
else:
return tf.keras.layers.DepthwiseConv2D(**kwargs)
def BatchNormalization(self, **kwargs):
"""Builds a normalization layer.
Overrides the Keras application batch norm with the norm specified by the
Object Detection configuration.
Args:
**kwargs: Only the name is used, all other params ignored.
Required for matching `layers.BatchNormalization` calls in the Keras
application.
Returns:
A normalization layer specified by the Object Detection hyperparameter
configurations.
"""
name = kwargs.get('name')
if self._conv_hyperparams:
return self._conv_hyperparams.build_batch_norm(
training=self._batchnorm_training,
name=name)
else:
return freezable_batch_norm.FreezableBatchNorm(
training=self._batchnorm_training,
epsilon=1e-3,
momentum=self._default_batchnorm_momentum,
name=name)
def Input(self, shape):
"""Builds an Input layer.
Overrides the Keras application Input layer with one that uses a
tf.placeholder_with_default instead of a tf.placeholder. This is necessary
to ensure the application works when run on a TPU.
Args:
shape: The shape for the input layer to use. (Does not include a dimension
for the batch size).
Returns:
An input layer for the specified shape that internally uses a
placeholder_with_default.
"""
default_size = 224
default_batch_size = 1
shape = list(shape)
default_shape = [default_size if dim is None else dim for dim in shape]
input_tensor = tf.constant(0.0, shape=[default_batch_size] + default_shape)
placeholder_with_default = tf.placeholder_with_default(
input=input_tensor, shape=[None] + shape)
return tf.keras.layers.Input(tensor=placeholder_with_default)
# pylint: disable=unused-argument
def ReLU(self, *args, **kwargs):
"""Builds an activation layer.
Overrides the Keras application ReLU with the activation specified by the
Object Detection configuration.
Args:
*args: Ignored, required to match the `tf.keras.ReLU` interface
**kwargs: Only the name is used,
required to match `tf.keras.ReLU` interface
Returns:
An activation layer specified by the Object Detection hyperparameter
configurations.
"""
name = kwargs.get('name')
if self._conv_hyperparams:
return self._conv_hyperparams.build_activation_layer(name=name)
else:
return tf.keras.layers.Lambda(tf.nn.relu6, name=name)
# pylint: enable=unused-argument
# pylint: disable=unused-argument
def ZeroPadding2D(self, **kwargs):
"""Replaces explicit padding in the Keras application with a no-op.
Args:
**kwargs: Ignored, required to match the Keras applications usage.
Returns:
A no-op identity lambda.
"""
return lambda x: x
# pylint: enable=unused-argument
# Forward all non-overridden methods to the keras layers
def __getattr__(self, item):
return getattr(tf.keras.layers, item)
def mobilenet_v2(batchnorm_training,
default_batchnorm_momentum=0.9997,
conv_hyperparams=None,
use_explicit_padding=False,
alpha=1.0,
min_depth=None,
**kwargs):
"""Instantiates the MobileNetV2 architecture, modified for object detection.
This wraps the MobileNetV2 tensorflow Keras application, but uses the
Keras application's kwargs-based monkey-patching API to override the Keras
architecture with the following changes:
- Changes the default batchnorm momentum to 0.9997
- Applies the Object Detection hyperparameter configuration
- Supports FreezableBatchNorms
- Adds support for a min number of filters for each layer
- Makes the `alpha` parameter affect the final convolution block even if it
is less than 1.0
- Adds support for explicit padding of convolutions
- Makes the Input layer use a tf.placeholder_with_default instead of a
tf.placeholder, to work on TPUs.
Args:
batchnorm_training: Bool. Assigned to Batch norm layer `training` param
when constructing `freezable_batch_norm.FreezableBatchNorm` layers.
default_batchnorm_momentum: Float. When 'conv_hyperparams' is None,
batch norm layers will be constructed using this value as the momentum.
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
containing hyperparameters for convolution ops. Optionally set to `None`
to use default mobilenet_v2 layer builders.
use_explicit_padding: If True, use 'valid' padding for convolutions,
but explicitly pre-pads inputs so that the output dimensions are the
same as if 'same' padding were used. Off by default.
alpha: The width multiplier referenced in the MobileNetV2 paper. It
modifies the number of filters in each convolutional layer.
min_depth: Minimum number of filters in the convolutional layers.
**kwargs: Keyword arguments forwarded directly to the
`tf.keras.applications.MobilenetV2` method that constructs the Keras
model.
Returns:
A Keras model instance.
"""
layers_override = _LayersOverride(
batchnorm_training,
default_batchnorm_momentum=default_batchnorm_momentum,
conv_hyperparams=conv_hyperparams,
use_explicit_padding=use_explicit_padding,
min_depth=min_depth,
alpha=alpha)
return tf.keras.applications.MobileNetV2(alpha=alpha,
layers=layers_override,
**kwargs)
# pylint: enable=invalid-name
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for mobilenet_v2."""
import itertools
import numpy as np
import tensorflow as tf
from google.protobuf import text_format
from object_detection.builders import hyperparams_builder
from object_detection.models.keras_applications import mobilenet_v2
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
_layers_to_check = [
'Conv1_relu',
'block_1_expand_relu', 'block_1_depthwise_relu', 'block_1_project_BN',
'block_2_expand_relu', 'block_2_depthwise_relu', 'block_2_project_BN',
'block_3_expand_relu', 'block_3_depthwise_relu', 'block_3_project_BN',
'block_4_expand_relu', 'block_4_depthwise_relu', 'block_4_project_BN',
'block_5_expand_relu', 'block_5_depthwise_relu', 'block_5_project_BN',
'block_6_expand_relu', 'block_6_depthwise_relu', 'block_6_project_BN',
'block_7_expand_relu', 'block_7_depthwise_relu', 'block_7_project_BN',
'block_8_expand_relu', 'block_8_depthwise_relu', 'block_8_project_BN',
'block_9_expand_relu', 'block_9_depthwise_relu', 'block_9_project_BN',
'block_10_expand_relu', 'block_10_depthwise_relu', 'block_10_project_BN',
'block_11_expand_relu', 'block_11_depthwise_relu', 'block_11_project_BN',
'block_12_expand_relu', 'block_12_depthwise_relu', 'block_12_project_BN',
'block_13_expand_relu', 'block_13_depthwise_relu', 'block_13_project_BN',
'block_14_expand_relu', 'block_14_depthwise_relu', 'block_14_project_BN',
'block_15_expand_relu', 'block_15_depthwise_relu', 'block_15_project_BN',
'block_16_expand_relu', 'block_16_depthwise_relu', 'block_16_project_BN',
'out_relu']
class MobilenetV2Test(test_case.TestCase):
def _build_conv_hyperparams(self):
conv_hyperparams = hyperparams_pb2.Hyperparams()
conv_hyperparams_text_proto = """
activation: RELU_6
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
batch_norm {
train: true,
scale: false,
center: true,
decay: 0.2,
epsilon: 0.1,
}
"""
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
def _create_application_with_layer_outputs(
self, layer_names, batchnorm_training,
conv_hyperparams=None,
use_explicit_padding=False,
alpha=1.0,
min_depth=None):
"""Constructs Keras mobilenetv2 that extracts intermediate layer outputs."""
if not layer_names:
layer_names = _layers_to_check
full_model = mobilenet_v2.mobilenet_v2(
batchnorm_training=batchnorm_training,
conv_hyperparams=conv_hyperparams,
weights=None,
use_explicit_padding=use_explicit_padding,
alpha=alpha,
min_depth=min_depth,
include_top=False)
layer_outputs = [full_model.get_layer(name=layer).output
for layer in layer_names]
return tf.keras.Model(
inputs=full_model.inputs,
outputs=layer_outputs)
def _check_returns_correct_shape(
self, batch_size, image_height, image_width, depth_multiplier,
expected_feature_map_shapes, use_explicit_padding=False, min_depth=None,
layer_names=None):
def graph_fn(image_tensor):
model = self._create_application_with_layer_outputs(
layer_names=layer_names,
batchnorm_training=False, use_explicit_padding=use_explicit_padding,
min_depth=min_depth,
alpha=depth_multiplier)
return model(image_tensor)
image_tensor = np.random.rand(batch_size, image_height, image_width,
3).astype(np.float32)
feature_maps = self.execute(graph_fn, [image_tensor])
for feature_map, expected_shape in itertools.izip(
feature_maps, expected_feature_map_shapes):
self.assertAllEqual(feature_map.shape, expected_shape)
def _check_returns_correct_shapes_with_dynamic_inputs(
self, batch_size, image_height, image_width, depth_multiplier,
expected_feature_map_shapes, use_explicit_padding=False,
layer_names=None):
def graph_fn(image_height, image_width):
image_tensor = tf.random_uniform([batch_size, image_height, image_width,
3], dtype=tf.float32)
model = self._create_application_with_layer_outputs(
layer_names=layer_names,
batchnorm_training=False, use_explicit_padding=use_explicit_padding,
alpha=depth_multiplier)
return model(image_tensor)
feature_maps = self.execute_cpu(graph_fn, [
np.array(image_height, dtype=np.int32),
np.array(image_width, dtype=np.int32)
])
for feature_map, expected_shape in itertools.izip(
feature_maps, expected_feature_map_shapes):
self.assertAllEqual(feature_map.shape, expected_shape)
def _get_variables(self, depth_multiplier, layer_names=None):
g = tf.Graph()
with g.as_default():
preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
model = self._create_application_with_layer_outputs(
layer_names=layer_names,
batchnorm_training=False, use_explicit_padding=False,
alpha=depth_multiplier)
model(preprocessed_inputs)
return g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
def test_returns_correct_shapes_128(self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
expected_feature_map_shape = [(2, 64, 64, 32),
(2, 64, 64, 96),
(2, 32, 32, 96),
(2, 32, 32, 24),
(2, 32, 32, 144),
(2, 32, 32, 144),
(2, 32, 32, 24),
(2, 32, 32, 144),
(2, 16, 16, 144),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 16, 16, 192),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 16, 16, 192),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 8, 8, 192),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 8, 8, 576),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 8, 8, 576),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 4, 4, 576),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 320),
(2, 4, 4, 1280)]
self._check_returns_correct_shape(
2, image_height, image_width, depth_multiplier,
expected_feature_map_shape)
def test_returns_correct_shapes_128_explicit_padding(
self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
expected_feature_map_shape = [(2, 64, 64, 32),
(2, 64, 64, 96),
(2, 32, 32, 96),
(2, 32, 32, 24),
(2, 32, 32, 144),
(2, 32, 32, 144),
(2, 32, 32, 24),
(2, 32, 32, 144),
(2, 16, 16, 144),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 16, 16, 192),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 16, 16, 192),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 8, 8, 192),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 8, 8, 576),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 8, 8, 576),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 4, 4, 576),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 320),
(2, 4, 4, 1280)]
self._check_returns_correct_shape(
2, image_height, image_width, depth_multiplier,
expected_feature_map_shape, use_explicit_padding=True)
def test_returns_correct_shapes_with_dynamic_inputs(
self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
expected_feature_map_shape = [(2, 64, 64, 32),
(2, 64, 64, 96),
(2, 32, 32, 96),
(2, 32, 32, 24),
(2, 32, 32, 144),
(2, 32, 32, 144),
(2, 32, 32, 24),
(2, 32, 32, 144),
(2, 16, 16, 144),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 16, 16, 192),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 16, 16, 192),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 8, 8, 192),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 8, 8, 576),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 8, 8, 576),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 4, 4, 576),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 320),
(2, 4, 4, 1280)]
self._check_returns_correct_shapes_with_dynamic_inputs(
2, image_height, image_width, depth_multiplier,
expected_feature_map_shape)
def test_returns_correct_shapes_299(self):
image_height = 299
image_width = 299
depth_multiplier = 1.0
expected_feature_map_shape = [(2, 150, 150, 32),
(2, 150, 150, 96),
(2, 75, 75, 96),
(2, 75, 75, 24),
(2, 75, 75, 144),
(2, 75, 75, 144),
(2, 75, 75, 24),
(2, 75, 75, 144),
(2, 38, 38, 144),
(2, 38, 38, 32),
(2, 38, 38, 192),
(2, 38, 38, 192),
(2, 38, 38, 32),
(2, 38, 38, 192),
(2, 38, 38, 192),
(2, 38, 38, 32),
(2, 38, 38, 192),
(2, 19, 19, 192),
(2, 19, 19, 64),
(2, 19, 19, 384),
(2, 19, 19, 384),
(2, 19, 19, 64),
(2, 19, 19, 384),
(2, 19, 19, 384),
(2, 19, 19, 64),
(2, 19, 19, 384),
(2, 19, 19, 384),
(2, 19, 19, 64),
(2, 19, 19, 384),
(2, 19, 19, 384),
(2, 19, 19, 96),
(2, 19, 19, 576),
(2, 19, 19, 576),
(2, 19, 19, 96),
(2, 19, 19, 576),
(2, 19, 19, 576),
(2, 19, 19, 96),
(2, 19, 19, 576),
(2, 10, 10, 576),
(2, 10, 10, 160),
(2, 10, 10, 960),
(2, 10, 10, 960),
(2, 10, 10, 160),
(2, 10, 10, 960),
(2, 10, 10, 960),
(2, 10, 10, 160),
(2, 10, 10, 960),
(2, 10, 10, 960),
(2, 10, 10, 320),
(2, 10, 10, 1280)]
self._check_returns_correct_shape(
2, image_height, image_width, depth_multiplier,
expected_feature_map_shape)
def test_returns_correct_shapes_enforcing_min_depth(
self):
image_height = 299
image_width = 299
depth_multiplier = 0.5**12
expected_feature_map_shape = [(2, 150, 150, 32),
(2, 150, 150, 192),
(2, 75, 75, 192),
(2, 75, 75, 32),
(2, 75, 75, 192),
(2, 75, 75, 192),
(2, 75, 75, 32),
(2, 75, 75, 192),
(2, 38, 38, 192),
(2, 38, 38, 32),
(2, 38, 38, 192),
(2, 38, 38, 192),
(2, 38, 38, 32),
(2, 38, 38, 192),
(2, 38, 38, 192),
(2, 38, 38, 32),
(2, 38, 38, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 10, 10, 192),
(2, 10, 10, 32),
(2, 10, 10, 192),
(2, 10, 10, 192),
(2, 10, 10, 32),
(2, 10, 10, 192),
(2, 10, 10, 192),
(2, 10, 10, 32),
(2, 10, 10, 192),
(2, 10, 10, 192),
(2, 10, 10, 32),
(2, 10, 10, 32)]
self._check_returns_correct_shape(
2, image_height, image_width, depth_multiplier,
expected_feature_map_shape, min_depth=32)
def test_hyperparam_override(self):
hyperparams = self._build_conv_hyperparams()
model = mobilenet_v2.mobilenet_v2(
batchnorm_training=True,
conv_hyperparams=hyperparams,
weights=None,
use_explicit_padding=False,
alpha=1.0,
min_depth=32,
include_top=False)
hyperparams.params()
bn_layer = model.get_layer(name='block_5_project_BN')
self.assertAllClose(bn_layer.momentum, 0.2)
self.assertAllClose(bn_layer.epsilon, 0.1)
def test_variable_count(self):
depth_multiplier = 1
variables = self._get_variables(depth_multiplier)
self.assertEqual(len(variables), 260)
if __name__ == '__main__':
tf.test.main()
......@@ -21,18 +21,40 @@ import itertools
import numpy as np
import tensorflow as tf
from google.protobuf import text_format
from object_detection.builders import hyperparams_builder
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
class SsdFeatureExtractorTestBase(test_case.TestCase):
def _build_conv_hyperparams(self):
conv_hyperparams = hyperparams_pb2.Hyperparams()
conv_hyperparams_text_proto = """
activation: RELU_6
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
batch_norm {
scale: false
}
"""
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
def conv_hyperparams_fn(self):
with tf.contrib.slim.arg_scope([]) as sc:
return sc
@abstractmethod
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
use_explicit_padding=False):
use_explicit_padding=False, use_keras=False):
"""Constructs a new feature extractor.
Args:
......@@ -42,20 +64,42 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
use_explicit_padding: use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
use_keras: if True builds a keras-based feature extractor, if False builds
a slim-based one.
Returns:
an ssd_meta_arch.SSDFeatureExtractor object.
an ssd_meta_arch.SSDFeatureExtractor or an
ssd_meta_arch.SSDKerasFeatureExtractor object.
"""
pass
def check_extract_features_returns_correct_shape(
self, batch_size, image_height, image_width, depth_multiplier,
pad_to_multiple, expected_feature_map_shapes, use_explicit_padding=False):
def graph_fn(image_tensor):
def _extract_features(self, image_tensor, depth_multiplier, pad_to_multiple,
use_explicit_padding=False, use_keras=False):
try:
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple,
use_explicit_padding,
use_keras=use_keras)
# If the unit test does not support a use_keras arg, it raises an error:
except TypeError:
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple,
use_explicit_padding)
if use_keras:
feature_maps = feature_extractor(image_tensor)
else:
feature_maps = feature_extractor.extract_features(image_tensor)
return feature_maps
return feature_maps
def check_extract_features_returns_correct_shape(
self, batch_size, image_height, image_width, depth_multiplier,
pad_to_multiple, expected_feature_map_shapes, use_explicit_padding=False,
use_keras=False):
def graph_fn(image_tensor):
return self._extract_features(image_tensor,
depth_multiplier,
pad_to_multiple,
use_explicit_padding,
use_keras=use_keras)
image_tensor = np.random.rand(batch_size, image_height, image_width,
3).astype(np.float32)
......@@ -66,15 +110,16 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
def check_extract_features_returns_correct_shapes_with_dynamic_inputs(
self, batch_size, image_height, image_width, depth_multiplier,
pad_to_multiple, expected_feature_map_shapes, use_explicit_padding=False):
pad_to_multiple, expected_feature_map_shapes, use_explicit_padding=False,
use_keras=False):
def graph_fn(image_height, image_width):
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple,
use_explicit_padding)
image_tensor = tf.random_uniform([batch_size, image_height, image_width,
3], dtype=tf.float32)
feature_maps = feature_extractor.extract_features(image_tensor)
return feature_maps
return self._extract_features(image_tensor,
depth_multiplier,
pad_to_multiple,
use_explicit_padding,
use_keras=use_keras)
feature_maps = self.execute_cpu(graph_fn, [
np.array(image_height, dtype=np.int32),
......@@ -85,11 +130,13 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
self.assertAllEqual(feature_map.shape, expected_shape)
def check_extract_features_raises_error_with_invalid_image_size(
self, image_height, image_width, depth_multiplier, pad_to_multiple):
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
self, image_height, image_width, depth_multiplier, pad_to_multiple,
use_keras=False):
preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
feature_maps = feature_extractor.extract_features(preprocessed_inputs)
feature_maps = self._extract_features(preprocessed_inputs,
depth_multiplier,
pad_to_multiple,
use_keras=use_keras)
test_preprocessed_image = np.random.rand(4, image_height, image_width, 3)
with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
......@@ -98,13 +145,19 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
feed_dict={preprocessed_inputs: test_preprocessed_image})
def check_feature_extractor_variables_under_scope(
self, depth_multiplier, pad_to_multiple, scope_name):
self, depth_multiplier, pad_to_multiple, scope_name, use_keras=False):
variables = self.get_feature_extractor_variables(
depth_multiplier, pad_to_multiple, use_keras)
for variable in variables:
self.assertTrue(variable.name.startswith(scope_name))
def get_feature_extractor_variables(
self, depth_multiplier, pad_to_multiple, use_keras=False):
g = tf.Graph()
with g.as_default():
feature_extractor = self._create_feature_extractor(
depth_multiplier, pad_to_multiple)
preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
feature_extractor.extract_features(preprocessed_inputs)
variables = g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
for variable in variables:
self.assertTrue(variable.name.startswith(scope_name))
self._extract_features(preprocessed_inputs,
depth_multiplier,
pad_to_multiple,
use_keras=use_keras)
return g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
......@@ -15,6 +15,8 @@
"""SSD MobilenetV1 FPN Feature Extractor."""
import copy
import functools
import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch
......@@ -27,6 +29,15 @@ from nets import mobilenet_v1
slim = tf.contrib.slim
# A modified config of mobilenet v1 that makes it more detection friendly,
def _create_modified_mobilenet_config():
conv_defs = copy.copy(mobilenet_v1.MOBILENETV1_CONV_DEFS)
conv_defs[-2] = mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=2, depth=512)
conv_defs[-1] = mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=256)
return conv_defs
_CONV_DEFS = _create_modified_mobilenet_config()
class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""SSD Feature Extractor using MobilenetV1 FPN features."""
......@@ -38,6 +49,7 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
conv_hyperparams_fn,
fpn_min_level=3,
fpn_max_level=7,
additional_layer_depth=256,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False,
......@@ -63,6 +75,7 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
maps in the backbone network, additional feature maps are created by
applying stride 2 convolutions until we get the desired number of fpn
levels.
additional_layer_depth: additional feature map layer channel depth.
reuse_weights: whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
......@@ -84,6 +97,7 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
override_base_feature_extractor_hyperparams)
self._fpn_min_level = fpn_min_level
self._fpn_max_level = fpn_max_level
self._additional_layer_depth = additional_layer_depth
def preprocess(self, resized_inputs):
"""SSD preprocessing.
......@@ -127,6 +141,7 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
final_endpoint='Conv2d_13_pointwise',
min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
conv_defs=_CONV_DEFS if self._use_depthwise else None,
use_explicit_padding=self._use_explicit_padding,
scope=scope)
......@@ -143,7 +158,8 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
feature_block_list.append(feature_blocks[level - 2])
fpn_features = feature_map_generators.fpn_top_down_feature_maps(
[(key, image_features[key]) for key in feature_block_list],
depth=depth_fn(256))
depth=depth_fn(self._additional_layer_depth),
use_depthwise=self._use_depthwise)
feature_maps = []
for level in range(self._fpn_min_level, base_fpn_max_level + 1):
feature_maps.append(fpn_features['top_down_{}'.format(
......@@ -152,9 +168,14 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
feature_blocks[base_fpn_max_level - 2])]
# Construct coarse features
for i in range(base_fpn_max_level + 1, self._fpn_max_level + 1):
last_feature_map = slim.conv2d(
if self._use_depthwise:
conv_op = functools.partial(
slim.separable_conv2d, depth_multiplier=1)
else:
conv_op = slim.conv2d
last_feature_map = conv_op(
last_feature_map,
num_outputs=depth_fn(256),
num_outputs=depth_fn(self._additional_layer_depth),
kernel_size=[3, 3],
stride=2,
padding='SAME',
......
......@@ -14,20 +14,27 @@
# ==============================================================================
"""Tests for ssd_mobilenet_v2_feature_extractor."""
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
from object_detection.models import ssd_feature_extractor_test
from object_detection.models import ssd_mobilenet_v2_feature_extractor
from object_detection.models import ssd_mobilenet_v2_keras_feature_extractor
slim = tf.contrib.slim
@parameterized.parameters(
{'use_keras': False},
{'use_keras': True},
)
class SsdMobilenetV2FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
use_explicit_padding=False):
use_explicit_padding=False, use_keras=False):
"""Constructs a new feature extractor.
Args:
......@@ -37,19 +44,47 @@ class SsdMobilenetV2FeatureExtractorTest(
use_explicit_padding: use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
use_keras: if True builds a keras-based feature extractor, if False builds
a slim-based one.
Returns:
an ssd_meta_arch.SSDFeatureExtractor object.
"""
min_depth = 32
return ssd_mobilenet_v2_feature_extractor.SSDMobileNetV2FeatureExtractor(
False,
depth_multiplier,
min_depth,
pad_to_multiple,
self.conv_hyperparams_fn,
use_explicit_padding=use_explicit_padding)
def test_extract_features_returns_correct_shapes_128(self):
if use_keras:
return (ssd_mobilenet_v2_keras_feature_extractor.
SSDMobileNetV2KerasFeatureExtractor(
is_training=False,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams=self._build_conv_hyperparams(),
freeze_batchnorm=False,
inplace_batchnorm_update=False,
use_explicit_padding=use_explicit_padding,
name='MobilenetV2'))
else:
return ssd_mobilenet_v2_feature_extractor.SSDMobileNetV2FeatureExtractor(
False,
depth_multiplier,
min_depth,
pad_to_multiple,
self.conv_hyperparams_fn,
use_explicit_padding=use_explicit_padding)
def test_extract_features_returns_correct_shapes_128(self, use_keras):
image_height = 128
image_width = 128
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(2, 8, 8, 576), (2, 4, 4, 1280),
(2, 2, 2, 512), (2, 1, 1, 256),
(2, 1, 1, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_keras=use_keras)
def test_extract_features_returns_correct_shapes_128_explicit_padding(
self, use_keras):
image_height = 128
image_width = 128
depth_multiplier = 1.0
......@@ -59,9 +94,11 @@ class SsdMobilenetV2FeatureExtractorTest(
(2, 1, 1, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
expected_feature_map_shape, use_explicit_padding=True,
use_keras=use_keras)
def test_extract_features_returns_correct_shapes_with_dynamic_inputs(self):
def test_extract_features_returns_correct_shapes_with_dynamic_inputs(
self, use_keras):
image_height = 128
image_width = 128
depth_multiplier = 1.0
......@@ -71,9 +108,9 @@ class SsdMobilenetV2FeatureExtractorTest(
(2, 1, 1, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
expected_feature_map_shape, use_keras=use_keras)
def test_extract_features_returns_correct_shapes_299(self):
def test_extract_features_returns_correct_shapes_299(self, use_keras):
image_height = 299
image_width = 299
depth_multiplier = 1.0
......@@ -83,9 +120,10 @@ class SsdMobilenetV2FeatureExtractorTest(
(2, 2, 2, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
expected_feature_map_shape, use_keras=use_keras)
def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
def test_extract_features_returns_correct_shapes_enforcing_min_depth(
self, use_keras):
image_height = 299
image_width = 299
depth_multiplier = 0.5**12
......@@ -95,9 +133,10 @@ class SsdMobilenetV2FeatureExtractorTest(
(2, 2, 2, 32), (2, 1, 1, 32)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
expected_feature_map_shape, use_keras=use_keras)
def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
def test_extract_features_returns_correct_shapes_with_pad_to_multiple(
self, use_keras):
image_height = 299
image_width = 299
depth_multiplier = 1.0
......@@ -107,35 +146,45 @@ class SsdMobilenetV2FeatureExtractorTest(
(2, 2, 2, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
expected_feature_map_shape, use_keras=use_keras)
def test_extract_features_raises_error_with_invalid_image_size(self):
def test_extract_features_raises_error_with_invalid_image_size(
self, use_keras):
image_height = 32
image_width = 32
depth_multiplier = 1.0
pad_to_multiple = 1
self.check_extract_features_raises_error_with_invalid_image_size(
image_height, image_width, depth_multiplier, pad_to_multiple)
image_height, image_width, depth_multiplier, pad_to_multiple,
use_keras=use_keras)
def test_preprocess_returns_correct_value_range(self):
def test_preprocess_returns_correct_value_range(self, use_keras):
image_height = 128
image_width = 128
depth_multiplier = 1
pad_to_multiple = 1
test_image = np.random.rand(4, image_height, image_width, 3)
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
pad_to_multiple,
use_keras=use_keras)
preprocessed_image = feature_extractor.preprocess(test_image)
self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
def test_variables_only_created_in_scope(self):
def test_variables_only_created_in_scope(self, use_keras):
depth_multiplier = 1
pad_to_multiple = 1
scope_name = 'MobilenetV2'
self.check_feature_extractor_variables_under_scope(
depth_multiplier, pad_to_multiple, scope_name)
depth_multiplier, pad_to_multiple, scope_name, use_keras=use_keras)
def test_variable_count(self, use_keras):
depth_multiplier = 1
pad_to_multiple = 1
variables = self.get_feature_extractor_variables(
depth_multiplier, pad_to_multiple, use_keras=use_keras)
self.assertEqual(len(variables), 292)
def test_has_fused_batchnorm(self):
def test_has_fused_batchnorm(self, use_keras):
image_height = 40
image_width = 40
depth_multiplier = 1
......@@ -143,9 +192,13 @@ class SsdMobilenetV2FeatureExtractorTest(
image_placeholder = tf.placeholder(tf.float32,
[1, image_height, image_width, 3])
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
pad_to_multiple,
use_keras=use_keras)
preprocessed_image = feature_extractor.preprocess(image_placeholder)
_ = feature_extractor.extract_features(preprocessed_image)
if use_keras:
_ = feature_extractor(preprocessed_image)
else:
_ = feature_extractor.extract_features(preprocessed_image)
self.assertTrue(any(op.type == 'FusedBatchNorm'
for op in tf.get_default_graph().get_operations()))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment