Unverified Commit 1f484095 authored by pkulzc's avatar pkulzc Committed by GitHub
Browse files

Minor fixes for object detection.

214018767  by Zhichao Lu:

    Add original_image_spatial_shape tensor in input dictionary to store shape of the original input image

--
213914693  by lzc:

    Internal change.

--
213872175  by Zhichao Lu:

    This CL adds a Keras-based mobilenet_v2 feature extractor for object detection models.

    As part of this CL, we use the Keras mobilenet_v2 application's keyword argument layer injection API to allow the generated network to support the object detection hyperparameters.

--
213848499  by Zhichao Lu:

    Replace tf.image.resize_nearest_neighbor with tf.image.resize_images. tf.image.resize_nearest_neighbor only supports 4-D tensors but masks is a 3-D tensor.

--
213758622  by lzc:

    Internal change.

--

PiperOrigin-RevId: 214018767
parent 99256cf4
...@@ -163,6 +163,10 @@ class KerasLayerHyperparams(object): ...@@ -163,6 +163,10 @@ class KerasLayerHyperparams(object):
new_params['activation'] = None new_params['activation'] = None
if include_activation: if include_activation:
new_params['activation'] = self._activation_fn new_params['activation'] = self._activation_fn
if self.use_batch_norm() and self.batch_norm_params()['center']:
new_params['use_bias'] = False
else:
new_params['use_bias'] = True
new_params.update(**overrides) new_params.update(**overrides)
return new_params return new_params
......
...@@ -50,11 +50,6 @@ from object_detection.models.ssd_mobilenet_v2_fpn_feature_extractor import SSDMo ...@@ -50,11 +50,6 @@ from object_detection.models.ssd_mobilenet_v2_fpn_feature_extractor import SSDMo
from object_detection.predictors import rfcn_box_predictor from object_detection.predictors import rfcn_box_predictor
from object_detection.protos import model_pb2 from object_detection.protos import model_pb2
from object_detection.utils import ops from object_detection.utils import ops
# BEGIN GOOGLE-INTERNAL
# TODO(lzc): move ssd_mask_meta_arch to third party when it has decent
# performance relative to a comparable Mask R-CNN model (b/112561592).
from google3.image.understanding.object_detection.meta_architectures import ssd_mask_meta_arch
# END GOOGLE-INTERNAL
# A map of names to SSD feature extractors. # A map of names to SSD feature extractors.
SSD_FEATURE_EXTRACTOR_CLASS_MAP = { SSD_FEATURE_EXTRACTOR_CLASS_MAP = {
...@@ -254,23 +249,6 @@ def _build_ssd_model(ssd_config, is_training, add_summaries, ...@@ -254,23 +249,6 @@ def _build_ssd_model(ssd_config, is_training, add_summaries,
desired_negative_sampling_ratio) desired_negative_sampling_ratio)
ssd_meta_arch_fn = ssd_meta_arch.SSDMetaArch ssd_meta_arch_fn = ssd_meta_arch.SSDMetaArch
# BEGIN GOOGLE-INTERNAL
# TODO(lzc): move ssd_mask_meta_arch to third party when it has decent
# performance relative to a comparable Mask R-CNN model (b/112561592).
predictor_config = ssd_config.box_predictor
predict_instance_masks = False
if predictor_config.WhichOneof(
'box_predictor_oneof') == 'convolutional_box_predictor':
predict_instance_masks = (
predictor_config.convolutional_box_predictor.HasField('mask_head'))
elif predictor_config.WhichOneof(
'box_predictor_oneof') == 'weight_shared_convolutional_box_predictor':
predict_instance_masks = (
predictor_config.weight_shared_convolutional_box_predictor.HasField(
'mask_head'))
if predict_instance_masks:
ssd_meta_arch_fn = ssd_mask_meta_arch.SSDMaskMetaArch
# END GOOGLE-INTERNAL
return ssd_meta_arch_fn( return ssd_meta_arch_fn(
is_training=is_training, is_training=is_training,
......
...@@ -40,11 +40,6 @@ from object_detection.models.ssd_mobilenet_v1_ppn_feature_extractor import SSDMo ...@@ -40,11 +40,6 @@ from object_detection.models.ssd_mobilenet_v1_ppn_feature_extractor import SSDMo
from object_detection.models.ssd_mobilenet_v2_feature_extractor import SSDMobileNetV2FeatureExtractor from object_detection.models.ssd_mobilenet_v2_feature_extractor import SSDMobileNetV2FeatureExtractor
from object_detection.models.ssd_mobilenet_v2_fpn_feature_extractor import SSDMobileNetV2FpnFeatureExtractor from object_detection.models.ssd_mobilenet_v2_fpn_feature_extractor import SSDMobileNetV2FpnFeatureExtractor
from object_detection.protos import model_pb2 from object_detection.protos import model_pb2
# BEGIN GOOGLE-INTERNAL
# TODO(lzc): move ssd_mask_meta_arch to third party when it has decent
# performance relative to a comparable Mask R-CNN model (b/112561592).
from google3.image.understanding.object_detection.meta_architectures import ssd_mask_meta_arch
# END GOOGLE-INTERNAL
FRCNN_RESNET_FEAT_MAPS = { FRCNN_RESNET_FEAT_MAPS = {
'faster_rcnn_resnet50': 'faster_rcnn_resnet50':
...@@ -169,161 +164,6 @@ class ModelBuilderTest(tf.test.TestCase, parameterized.TestCase): ...@@ -169,161 +164,6 @@ class ModelBuilderTest(tf.test.TestCase, parameterized.TestCase):
'desired_negative_sampling_ratio': 2 'desired_negative_sampling_ratio': 2
}) })
# BEGIN GOOGLE-INTERNAL
# TODO(lzc): move ssd_mask_meta_arch to third party when it has decent
# performance relative to a comparable Mask R-CNN model (b/112561592).
def test_create_ssd_conv_predictor_model_with_mask(self):
model_text_proto = """
ssd {
feature_extractor {
type: 'ssd_inception_v2'
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
override_base_feature_extractor_hyperparams: true
}
box_coder {
faster_rcnn_box_coder {
}
}
matcher {
argmax_matcher {
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
aspect_ratios: 1.0
}
}
image_resizer {
fixed_shape_resizer {
height: 320
width: 320
}
}
box_predictor {
convolutional_box_predictor {
mask_head {
}
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
}
loss {
classification_loss {
weighted_softmax {
}
}
localization_loss {
weighted_smooth_l1 {
}
}
}
use_expected_classification_loss_under_sampling: true
minimum_negative_sampling: 10
desired_negative_sampling_ratio: 2
}"""
model_proto = model_pb2.DetectionModel()
text_format.Merge(model_text_proto, model_proto)
model = self.create_model(model_proto)
self.assertIsInstance(model, ssd_mask_meta_arch.SSDMaskMetaArch)
def test_create_ssd_weight_shared_predictor_model_with_mask(self):
model_text_proto = """
ssd {
feature_extractor {
type: 'ssd_inception_v2'
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
override_base_feature_extractor_hyperparams: true
}
box_coder {
faster_rcnn_box_coder {
}
}
matcher {
argmax_matcher {
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
aspect_ratios: 1.0
}
}
image_resizer {
fixed_shape_resizer {
height: 320
width: 320
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
mask_head {
}
depth: 32
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
random_normal_initializer {
}
}
}
num_layers_before_predictor: 1
}
}
loss {
classification_loss {
weighted_softmax {
}
}
localization_loss {
weighted_smooth_l1 {
}
}
}
use_expected_classification_loss_under_sampling: true
minimum_negative_sampling: 10
desired_negative_sampling_ratio: 2
}"""
model_proto = model_pb2.DetectionModel()
text_format.Merge(model_text_proto, model_proto)
model = self.create_model(model_proto)
self.assertIsInstance(model, ssd_mask_meta_arch.SSDMaskMetaArch)
# END GOOGLE-INTERNAL
def test_create_ssd_inception_v3_model_from_config(self): def test_create_ssd_inception_v3_model_from_config(self):
model_text_proto = """ model_text_proto = """
......
...@@ -58,55 +58,6 @@ class MulticlassNonMaxSuppressionTest(test_case.TestCase): ...@@ -58,55 +58,6 @@ class MulticlassNonMaxSuppressionTest(test_case.TestCase):
self.assertAllClose(nms_classes_output, exp_nms_classes) self.assertAllClose(nms_classes_output, exp_nms_classes)
# TODO(bhattad): Remove conditional after CMLE moves to TF 1.9 # TODO(bhattad): Remove conditional after CMLE moves to TF 1.9
# BEGIN GOOGLE-INTERNAL
def test_multiclass_nms_select_with_shared_boxes_pad_to_max_output_size(self):
boxes = np.array([[[0, 0, 1, 1]],
[[0, 0.1, 1, 1.1]],
[[0, -0.1, 1, 0.9]],
[[0, 10, 1, 11]],
[[0, 10.1, 1, 11.1]],
[[0, 100, 1, 101]],
[[0, 1000, 1, 1002]],
[[0, 1000, 1, 1002.1]]], np.float32)
scores = np.array([[.9, 0.01], [.75, 0.05],
[.6, 0.01], [.95, 0],
[.5, 0.01], [.3, 0.01],
[.01, .85], [.01, .5]], np.float32)
score_thresh = 0.1
iou_thresh = .5
max_size_per_class = 4
max_output_size = 5
exp_nms_corners = [[0, 10, 1, 11],
[0, 0, 1, 1],
[0, 1000, 1, 1002],
[0, 100, 1, 101]]
exp_nms_scores = [.95, .9, .85, .3]
exp_nms_classes = [0, 0, 1, 0]
def graph_fn(boxes, scores):
nms, num_valid_nms_boxes = post_processing.multiclass_non_max_suppression(
boxes,
scores,
score_thresh,
iou_thresh,
max_size_per_class,
max_total_size=max_output_size,
pad_to_max_output_size=True)
return [nms.get(), nms.get_field(fields.BoxListFields.scores),
nms.get_field(fields.BoxListFields.classes), num_valid_nms_boxes]
[nms_corners_output, nms_scores_output, nms_classes_output,
num_valid_nms_boxes] = self.execute(graph_fn, [boxes, scores])
self.assertEqual(num_valid_nms_boxes, 4)
self.assertAllClose(nms_corners_output[0:num_valid_nms_boxes],
exp_nms_corners)
self.assertAllClose(nms_scores_output[0:num_valid_nms_boxes],
exp_nms_scores)
self.assertAllClose(nms_classes_output[0:num_valid_nms_boxes],
exp_nms_classes)
# END GOOGLE-INTERNAL
def test_multiclass_nms_select_with_shared_boxes_given_keypoints(self): def test_multiclass_nms_select_with_shared_boxes_given_keypoints(self):
boxes = tf.constant([[[0, 0, 1, 1]], boxes = tf.constant([[[0, 0, 1, 1]],
...@@ -1126,61 +1077,6 @@ class MulticlassNonMaxSuppressionTest(test_case.TestCase): ...@@ -1126,61 +1077,6 @@ class MulticlassNonMaxSuppressionTest(test_case.TestCase):
self.assertAllClose(num_detections, [1, 1]) self.assertAllClose(num_detections, [1, 1])
# TODO(bhattad): Remove conditional after CMLE moves to TF 1.9 # TODO(bhattad): Remove conditional after CMLE moves to TF 1.9
# BEGIN GOOGLE-INTERNAL
def test_batch_multiclass_nms_with_use_static_shapes(self):
boxes = np.array([[[[0, 0, 1, 1], [0, 0, 4, 5]],
[[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
[[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
[[0, 10, 1, 11], [0, 10, 1, 11]]],
[[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
[[0, 100, 1, 101], [0, 100, 1, 101]],
[[0, 1000, 1, 1002], [0, 999, 2, 1004]],
[[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
np.float32)
scores = np.array([[[.9, 0.01], [.75, 0.05],
[.6, 0.01], [.95, 0]],
[[.5, 0.01], [.3, 0.01],
[.01, .85], [.01, .5]]],
np.float32)
clip_window = np.array([[0., 0., 5., 5.],
[0., 0., 200., 200.]],
np.float32)
score_thresh = 0.1
iou_thresh = .5
max_output_size = 4
exp_nms_corners = np.array([[[0, 0, 1, 1],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]],
[[0, 10.1, 1, 11.1],
[0, 100, 1, 101],
[0, 0, 0, 0],
[0, 0, 0, 0]]])
exp_nms_scores = np.array([[.9, 0., 0., 0.],
[.5, .3, 0, 0]])
exp_nms_classes = np.array([[0, 0, 0, 0],
[0, 0, 0, 0]])
def graph_fn(boxes, scores, clip_window):
(nmsed_boxes, nmsed_scores, nmsed_classes, _, _, num_detections
) = post_processing.batch_multiclass_non_max_suppression(
boxes, scores, score_thresh, iou_thresh,
max_size_per_class=max_output_size, clip_window=clip_window,
use_static_shapes=True)
return nmsed_boxes, nmsed_scores, nmsed_classes, num_detections
(nmsed_boxes, nmsed_scores, nmsed_classes,
num_detections) = self.execute(graph_fn, [boxes, scores, clip_window])
for i in range(len(num_detections)):
self.assertAllClose(nmsed_boxes[i, 0:num_detections[i]],
exp_nms_corners[i, 0:num_detections[i]])
self.assertAllClose(nmsed_scores[i, 0:num_detections[i]],
exp_nms_scores[i, 0:num_detections[i]])
self.assertAllClose(nmsed_classes[i, 0:num_detections[i]],
exp_nms_classes[i, 0:num_detections[i]])
self.assertAllClose(num_detections, [1, 2])
# END GOOGLE-INTERNAL
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -811,8 +811,10 @@ def random_image_scale(image, ...@@ -811,8 +811,10 @@ def random_image_scale(image,
image, [image_newysize, image_newxsize], align_corners=True) image, [image_newysize, image_newxsize], align_corners=True)
result.append(image) result.append(image)
if masks is not None: if masks is not None:
masks = tf.image.resize_nearest_neighbor( masks = tf.image.resize_images(
masks, [image_newysize, image_newxsize], align_corners=True) masks, [image_newysize, image_newxsize],
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
align_corners=True)
result.append(masks) result.append(masks)
return tuple(result) return tuple(result)
......
...@@ -36,6 +36,7 @@ class InputDataFields(object): ...@@ -36,6 +36,7 @@ class InputDataFields(object):
image: image. image: image.
image_additional_channels: additional channels. image_additional_channels: additional channels.
original_image: image in the original input size. original_image: image in the original input size.
original_image_spatial_shape: image in the original input size.
key: unique key corresponding to image. key: unique key corresponding to image.
source_id: source of the original image. source_id: source of the original image.
filename: original filename of the dataset (without common path). filename: original filename of the dataset (without common path).
...@@ -70,6 +71,7 @@ class InputDataFields(object): ...@@ -70,6 +71,7 @@ class InputDataFields(object):
image = 'image' image = 'image'
image_additional_channels = 'image_additional_channels' image_additional_channels = 'image_additional_channels'
original_image = 'original_image' original_image = 'original_image'
original_image_spatial_shape = 'original_image_spatial_shape'
key = 'key' key = 'key'
source_id = 'source_id' source_id = 'source_id'
filename = 'filename' filename = 'filename'
......
...@@ -322,6 +322,8 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -322,6 +322,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
A dictionary of the following tensors. A dictionary of the following tensors.
fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3] fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3]
containing image. containing image.
fields.InputDataFields.original_image_spatial_shape - 1D int32 tensor of
shape [2] containing shape of the image.
fields.InputDataFields.source_id - string tensor containing original fields.InputDataFields.source_id - string tensor containing original
image id. image id.
fields.InputDataFields.key - string tensor with unique sha256 hash key. fields.InputDataFields.key - string tensor with unique sha256 hash key.
...@@ -365,6 +367,8 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -365,6 +367,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
is_crowd = fields.InputDataFields.groundtruth_is_crowd is_crowd = fields.InputDataFields.groundtruth_is_crowd
tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool) tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3]) tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
tensor_dict[fields.InputDataFields.original_image_spatial_shape] = tf.shape(
tensor_dict[fields.InputDataFields.image])[:2]
tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape( tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape(
tensor_dict[fields.InputDataFields.groundtruth_boxes])[0] tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]
......
...@@ -97,13 +97,17 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -97,13 +97,17 @@ class TfExampleDecoderTest(tf.test.TestCase):
example_decoder = tf_example_decoder.TfExampleDecoder() example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual( self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
(tensor_dict[fields.InputDataFields.image].get_shape().as_list()), get_shape().as_list()), [None, None, 3])
[None, None, 3]) self.assertAllEqual((tensor_dict[fields.InputDataFields.
original_image_spatial_shape].
get_shape().as_list()), [2])
with self.test_session() as sess: with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict) tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(decoded_jpeg, tensor_dict[fields.InputDataFields.image]) self.assertAllEqual(decoded_jpeg, tensor_dict[fields.InputDataFields.image])
self.assertAllEqual([4, 5], tensor_dict[fields.InputDataFields.
original_image_spatial_shape])
self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id]) self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
def testDecodeImageKeyAndFilename(self): def testDecodeImageKeyAndFilename(self):
...@@ -141,13 +145,17 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -141,13 +145,17 @@ class TfExampleDecoderTest(tf.test.TestCase):
example_decoder = tf_example_decoder.TfExampleDecoder() example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual( self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
(tensor_dict[fields.InputDataFields.image].get_shape().as_list()), get_shape().as_list()), [None, None, 3])
[None, None, 3]) self.assertAllEqual((tensor_dict[fields.InputDataFields.
original_image_spatial_shape].
get_shape().as_list()), [2])
with self.test_session() as sess: with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict) tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(decoded_png, tensor_dict[fields.InputDataFields.image]) self.assertAllEqual(decoded_png, tensor_dict[fields.InputDataFields.image])
self.assertAllEqual([4, 5], tensor_dict[fields.InputDataFields.
original_image_spatial_shape])
self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id]) self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
def testDecodePngInstanceMasks(self): def testDecodePngInstanceMasks(self):
......
...@@ -103,7 +103,8 @@ def transform_input_data(tensor_dict, ...@@ -103,7 +103,8 @@ def transform_input_data(tensor_dict,
if retain_original_image: if retain_original_image:
tensor_dict[fields.InputDataFields.original_image] = tf.cast( tensor_dict[fields.InputDataFields.original_image] = tf.cast(
tensor_dict[fields.InputDataFields.image], tf.uint8) image_resizer_fn(tensor_dict[fields.InputDataFields.image], None)[0],
tf.uint8)
# Apply data augmentation ops. # Apply data augmentation ops.
if data_augmentation_fn is not None: if data_augmentation_fn is not None:
...@@ -199,6 +200,7 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes, ...@@ -199,6 +200,7 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes,
fields.InputDataFields.image: [ fields.InputDataFields.image: [
height, width, 3 + num_additional_channels height, width, 3 + num_additional_channels
], ],
fields.InputDataFields.original_image_spatial_shape: [2],
fields.InputDataFields.image_additional_channels: [ fields.InputDataFields.image_additional_channels: [
height, width, num_additional_channels height, width, num_additional_channels
], ],
...@@ -230,7 +232,7 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes, ...@@ -230,7 +232,7 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes,
if fields.InputDataFields.original_image in tensor_dict: if fields.InputDataFields.original_image in tensor_dict:
padding_shapes[fields.InputDataFields.original_image] = [ padding_shapes[fields.InputDataFields.original_image] = [
None, None, 3 + num_additional_channels height, width, 3 + num_additional_channels
] ]
if fields.InputDataFields.groundtruth_keypoints in tensor_dict: if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
tensor_shape = ( tensor_shape = (
...@@ -364,7 +366,9 @@ def _get_features_dict(input_dict): ...@@ -364,7 +366,9 @@ def _get_features_dict(input_dict):
input_dict[fields.InputDataFields.image], input_dict[fields.InputDataFields.image],
HASH_KEY: tf.cast(hash_from_source_id, tf.int32), HASH_KEY: tf.cast(hash_from_source_id, tf.int32),
fields.InputDataFields.true_image_shape: fields.InputDataFields.true_image_shape:
input_dict[fields.InputDataFields.true_image_shape] input_dict[fields.InputDataFields.true_image_shape],
fields.InputDataFields.original_image_spatial_shape:
input_dict[fields.InputDataFields.original_image_spatial_shape]
} }
if fields.InputDataFields.original_image in input_dict: if fields.InputDataFields.original_image in input_dict:
features[fields.InputDataFields.original_image] = input_dict[ features[fields.InputDataFields.original_image] = input_dict[
...@@ -479,8 +483,6 @@ def create_train_input_fn(train_config, train_input_config, ...@@ -479,8 +483,6 @@ def create_train_input_fn(train_config, train_input_config,
def create_eval_input_fn(eval_config, eval_input_config, model_config): def create_eval_input_fn(eval_config, eval_input_config, model_config):
"""Creates an eval `input` function for `Estimator`. """Creates an eval `input` function for `Estimator`.
# TODO(ronnyvotel,rathodv): Allow batch sizes of more than 1 for eval.
Args: Args:
eval_config: An eval_pb2.EvalConfig. eval_config: An eval_pb2.EvalConfig.
eval_input_config: An input_reader_pb2.InputReader. eval_input_config: An input_reader_pb2.InputReader.
...@@ -562,7 +564,7 @@ def create_eval_input_fn(eval_config, eval_input_config, model_config): ...@@ -562,7 +564,7 @@ def create_eval_input_fn(eval_config, eval_input_config, model_config):
return (_get_features_dict(tensor_dict), _get_labels_dict(tensor_dict)) return (_get_features_dict(tensor_dict), _get_labels_dict(tensor_dict))
dataset = INPUT_BUILDER_UTIL_MAP['dataset_build']( dataset = INPUT_BUILDER_UTIL_MAP['dataset_build'](
eval_input_config, eval_input_config,
batch_size=1, # Currently only support batch size of 1 for eval. batch_size=params['batch_size'] if params else eval_config.batch_size,
transform_input_data_fn=transform_and_pad_input_data_fn) transform_input_data_fn=transform_and_pad_input_data_fn)
return dataset return dataset
......
...@@ -20,6 +20,7 @@ from __future__ import print_function ...@@ -20,6 +20,7 @@ from __future__ import print_function
import functools import functools
import os import os
from absl.testing import parameterized
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
...@@ -28,6 +29,7 @@ from object_detection import inputs ...@@ -28,6 +29,7 @@ from object_detection import inputs
from object_detection.core import preprocessor from object_detection.core import preprocessor
from object_detection.core import standard_fields as fields from object_detection.core import standard_fields as fields
from object_detection.utils import config_util from object_detection.utils import config_util
from object_detection.utils import test_case
FLAGS = tf.flags.FLAGS FLAGS = tf.flags.FLAGS
...@@ -64,7 +66,7 @@ def _make_initializable_iterator(dataset): ...@@ -64,7 +66,7 @@ def _make_initializable_iterator(dataset):
return iterator return iterator
class InputsTest(tf.test.TestCase): class InputsTest(test_case.TestCase, parameterized.TestCase):
def test_faster_rcnn_resnet50_train_input(self): def test_faster_rcnn_resnet50_train_input(self):
"""Tests the training input function for FasterRcnnResnet50.""" """Tests the training input function for FasterRcnnResnet50."""
...@@ -103,52 +105,59 @@ class InputsTest(tf.test.TestCase): ...@@ -103,52 +105,59 @@ class InputsTest(tf.test.TestCase):
self.assertEqual(tf.float32, self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_weights].dtype) labels[fields.InputDataFields.groundtruth_weights].dtype)
def test_faster_rcnn_resnet50_eval_input(self): @parameterized.parameters(
{'eval_batch_size': 1},
{'eval_batch_size': 8}
)
def test_faster_rcnn_resnet50_eval_input(self, eval_batch_size=1):
"""Tests the eval input function for FasterRcnnResnet50.""" """Tests the eval input function for FasterRcnnResnet50."""
configs = _get_configs_for_model('faster_rcnn_resnet50_pets') configs = _get_configs_for_model('faster_rcnn_resnet50_pets')
model_config = configs['model'] model_config = configs['model']
model_config.faster_rcnn.num_classes = 37 model_config.faster_rcnn.num_classes = 37
eval_config = configs['eval_config']
eval_config.batch_size = eval_batch_size
eval_input_fn = inputs.create_eval_input_fn( eval_input_fn = inputs.create_eval_input_fn(
configs['eval_config'], configs['eval_input_configs'][0], model_config) eval_config, configs['eval_input_configs'][0], model_config)
features, labels = _make_initializable_iterator(eval_input_fn()).get_next() features, labels = _make_initializable_iterator(eval_input_fn()).get_next()
self.assertAllEqual([1, None, None, 3], self.assertAllEqual([eval_batch_size, None, None, 3],
features[fields.InputDataFields.image].shape.as_list()) features[fields.InputDataFields.image].shape.as_list())
self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype) self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype)
self.assertAllEqual( self.assertAllEqual(
[1, None, None, 3], [eval_batch_size, None, None, 3],
features[fields.InputDataFields.original_image].shape.as_list()) features[fields.InputDataFields.original_image].shape.as_list())
self.assertEqual(tf.uint8, self.assertEqual(tf.uint8,
features[fields.InputDataFields.original_image].dtype) features[fields.InputDataFields.original_image].dtype)
self.assertAllEqual([1], features[inputs.HASH_KEY].shape.as_list()) self.assertAllEqual([eval_batch_size],
features[inputs.HASH_KEY].shape.as_list())
self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype) self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype)
self.assertAllEqual( self.assertAllEqual(
[1, 100, 4], [eval_batch_size, 100, 4],
labels[fields.InputDataFields.groundtruth_boxes].shape.as_list()) labels[fields.InputDataFields.groundtruth_boxes].shape.as_list())
self.assertEqual(tf.float32, self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_boxes].dtype) labels[fields.InputDataFields.groundtruth_boxes].dtype)
self.assertAllEqual( self.assertAllEqual(
[1, 100, model_config.faster_rcnn.num_classes], [eval_batch_size, 100, model_config.faster_rcnn.num_classes],
labels[fields.InputDataFields.groundtruth_classes].shape.as_list()) labels[fields.InputDataFields.groundtruth_classes].shape.as_list())
self.assertEqual(tf.float32, self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_classes].dtype) labels[fields.InputDataFields.groundtruth_classes].dtype)
self.assertAllEqual( self.assertAllEqual(
[1, 100, model_config.faster_rcnn.num_classes], [eval_batch_size, 100, model_config.faster_rcnn.num_classes],
labels[fields.InputDataFields.groundtruth_confidences].shape.as_list()) labels[fields.InputDataFields.groundtruth_confidences].shape.as_list())
self.assertEqual( self.assertEqual(
tf.float32, tf.float32,
labels[fields.InputDataFields.groundtruth_confidences].dtype) labels[fields.InputDataFields.groundtruth_confidences].dtype)
self.assertAllEqual( self.assertAllEqual(
[1, 100], [eval_batch_size, 100],
labels[fields.InputDataFields.groundtruth_area].shape.as_list()) labels[fields.InputDataFields.groundtruth_area].shape.as_list())
self.assertEqual(tf.float32, self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_area].dtype) labels[fields.InputDataFields.groundtruth_area].dtype)
self.assertAllEqual( self.assertAllEqual(
[1, 100], [eval_batch_size, 100],
labels[fields.InputDataFields.groundtruth_is_crowd].shape.as_list()) labels[fields.InputDataFields.groundtruth_is_crowd].shape.as_list())
self.assertEqual( self.assertEqual(
tf.bool, labels[fields.InputDataFields.groundtruth_is_crowd].dtype) tf.bool, labels[fields.InputDataFields.groundtruth_is_crowd].dtype)
self.assertAllEqual( self.assertAllEqual(
[1, 100], [eval_batch_size, 100],
labels[fields.InputDataFields.groundtruth_difficult].shape.as_list()) labels[fields.InputDataFields.groundtruth_difficult].shape.as_list())
self.assertEqual( self.assertEqual(
tf.int32, labels[fields.InputDataFields.groundtruth_difficult].dtype) tf.int32, labels[fields.InputDataFields.groundtruth_difficult].dtype)
...@@ -197,53 +206,60 @@ class InputsTest(tf.test.TestCase): ...@@ -197,53 +206,60 @@ class InputsTest(tf.test.TestCase):
self.assertEqual(tf.float32, self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_weights].dtype) labels[fields.InputDataFields.groundtruth_weights].dtype)
def test_ssd_inceptionV2_eval_input(self): @parameterized.parameters(
{'eval_batch_size': 1},
{'eval_batch_size': 8}
)
def test_ssd_inceptionV2_eval_input(self, eval_batch_size=1):
"""Tests the eval input function for SSDInceptionV2.""" """Tests the eval input function for SSDInceptionV2."""
configs = _get_configs_for_model('ssd_inception_v2_pets') configs = _get_configs_for_model('ssd_inception_v2_pets')
model_config = configs['model'] model_config = configs['model']
model_config.ssd.num_classes = 37 model_config.ssd.num_classes = 37
eval_config = configs['eval_config']
eval_config.batch_size = eval_batch_size
eval_input_fn = inputs.create_eval_input_fn( eval_input_fn = inputs.create_eval_input_fn(
configs['eval_config'], configs['eval_input_configs'][0], model_config) eval_config, configs['eval_input_configs'][0], model_config)
features, labels = _make_initializable_iterator(eval_input_fn()).get_next() features, labels = _make_initializable_iterator(eval_input_fn()).get_next()
self.assertAllEqual([1, 300, 300, 3], self.assertAllEqual([eval_batch_size, 300, 300, 3],
features[fields.InputDataFields.image].shape.as_list()) features[fields.InputDataFields.image].shape.as_list())
self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype) self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype)
self.assertAllEqual( self.assertAllEqual(
[1, None, None, 3], [eval_batch_size, 300, 300, 3],
features[fields.InputDataFields.original_image].shape.as_list()) features[fields.InputDataFields.original_image].shape.as_list())
self.assertEqual(tf.uint8, self.assertEqual(tf.uint8,
features[fields.InputDataFields.original_image].dtype) features[fields.InputDataFields.original_image].dtype)
self.assertAllEqual([1], features[inputs.HASH_KEY].shape.as_list()) self.assertAllEqual([eval_batch_size],
features[inputs.HASH_KEY].shape.as_list())
self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype) self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype)
self.assertAllEqual( self.assertAllEqual(
[1, 100, 4], [eval_batch_size, 100, 4],
labels[fields.InputDataFields.groundtruth_boxes].shape.as_list()) labels[fields.InputDataFields.groundtruth_boxes].shape.as_list())
self.assertEqual(tf.float32, self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_boxes].dtype) labels[fields.InputDataFields.groundtruth_boxes].dtype)
self.assertAllEqual( self.assertAllEqual(
[1, 100, model_config.ssd.num_classes], [eval_batch_size, 100, model_config.ssd.num_classes],
labels[fields.InputDataFields.groundtruth_classes].shape.as_list()) labels[fields.InputDataFields.groundtruth_classes].shape.as_list())
self.assertEqual(tf.float32, self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_classes].dtype) labels[fields.InputDataFields.groundtruth_classes].dtype)
self.assertAllEqual( self.assertAllEqual(
[1, 100, model_config.ssd.num_classes], [eval_batch_size, 100, model_config.ssd.num_classes],
labels[ labels[
fields.InputDataFields.groundtruth_confidences].shape.as_list()) fields.InputDataFields.groundtruth_confidences].shape.as_list())
self.assertEqual( self.assertEqual(
tf.float32, tf.float32,
labels[fields.InputDataFields.groundtruth_confidences].dtype) labels[fields.InputDataFields.groundtruth_confidences].dtype)
self.assertAllEqual( self.assertAllEqual(
[1, 100], [eval_batch_size, 100],
labels[fields.InputDataFields.groundtruth_area].shape.as_list()) labels[fields.InputDataFields.groundtruth_area].shape.as_list())
self.assertEqual(tf.float32, self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_area].dtype) labels[fields.InputDataFields.groundtruth_area].dtype)
self.assertAllEqual( self.assertAllEqual(
[1, 100], [eval_batch_size, 100],
labels[fields.InputDataFields.groundtruth_is_crowd].shape.as_list()) labels[fields.InputDataFields.groundtruth_is_crowd].shape.as_list())
self.assertEqual( self.assertEqual(
tf.bool, labels[fields.InputDataFields.groundtruth_is_crowd].dtype) tf.bool, labels[fields.InputDataFields.groundtruth_is_crowd].dtype)
self.assertAllEqual( self.assertAllEqual(
[1, 100], [eval_batch_size, 100],
labels[fields.InputDataFields.groundtruth_difficult].shape.as_list()) labels[fields.InputDataFields.groundtruth_difficult].shape.as_list())
self.assertEqual( self.assertEqual(
tf.int32, labels[fields.InputDataFields.groundtruth_difficult].dtype) tf.int32, labels[fields.InputDataFields.groundtruth_difficult].dtype)
...@@ -379,7 +395,7 @@ class InputsTest(tf.test.TestCase): ...@@ -379,7 +395,7 @@ class InputsTest(tf.test.TestCase):
self.assertEqual(out_string, '2798129067578209328') self.assertEqual(out_string, '2798129067578209328')
class DataAugmentationFnTest(tf.test.TestCase): class DataAugmentationFnTest(test_case.TestCase):
def test_apply_image_and_box_augmentation(self): def test_apply_image_and_box_augmentation(self):
data_augmentation_options = [ data_augmentation_options = [
...@@ -529,7 +545,7 @@ def _fake_image_resizer_fn(image, mask): ...@@ -529,7 +545,7 @@ def _fake_image_resizer_fn(image, mask):
return (image, mask, tf.shape(image)) return (image, mask, tf.shape(image))
class DataTransformationFnTest(tf.test.TestCase): class DataTransformationFnTest(test_case.TestCase):
def test_combine_additional_channels_if_present(self): def test_combine_additional_channels_if_present(self):
image = np.random.rand(4, 4, 3).astype(np.float32) image = np.random.rand(4, 4, 3).astype(np.float32)
...@@ -622,7 +638,9 @@ class DataTransformationFnTest(tf.test.TestCase): ...@@ -622,7 +638,9 @@ class DataTransformationFnTest(tf.test.TestCase):
fields.InputDataFields.groundtruth_instance_masks: fields.InputDataFields.groundtruth_instance_masks:
tf.constant(np.random.rand(2, 4, 4).astype(np.float32)), tf.constant(np.random.rand(2, 4, 4).astype(np.float32)),
fields.InputDataFields.groundtruth_classes: fields.InputDataFields.groundtruth_classes:
tf.constant(np.array([3, 1], np.int32)) tf.constant(np.array([3, 1], np.int32)),
fields.InputDataFields.original_image_spatial_shape:
tf.constant(np.array([4, 4], np.int32))
} }
def fake_image_resizer_fn(image, masks=None): def fake_image_resizer_fn(image, masks=None):
...@@ -649,7 +667,9 @@ class DataTransformationFnTest(tf.test.TestCase): ...@@ -649,7 +667,9 @@ class DataTransformationFnTest(tf.test.TestCase):
self.assertAllEqual(transformed_inputs[ self.assertAllEqual(transformed_inputs[
fields.InputDataFields.original_image].dtype, tf.uint8) fields.InputDataFields.original_image].dtype, tf.uint8)
self.assertAllEqual(transformed_inputs[ self.assertAllEqual(transformed_inputs[
fields.InputDataFields.original_image].shape, [4, 4, 3]) fields.InputDataFields.original_image_spatial_shape], [4, 4])
self.assertAllEqual(transformed_inputs[
fields.InputDataFields.original_image].shape, [8, 8, 3])
self.assertAllEqual(transformed_inputs[ self.assertAllEqual(transformed_inputs[
fields.InputDataFields.groundtruth_instance_masks].shape, [2, 8, 8]) fields.InputDataFields.groundtruth_instance_masks].shape, [2, 8, 8])
...@@ -741,7 +761,7 @@ class DataTransformationFnTest(tf.test.TestCase): ...@@ -741,7 +761,7 @@ class DataTransformationFnTest(tf.test.TestCase):
(np_image + 5) * 2) (np_image + 5) * 2)
class PadInputDataToStaticShapesFnTest(tf.test.TestCase): class PadInputDataToStaticShapesFnTest(test_case.TestCase):
def test_pad_images_boxes_and_classes(self): def test_pad_images_boxes_and_classes(self):
input_tensor_dict = { input_tensor_dict = {
...@@ -751,7 +771,10 @@ class PadInputDataToStaticShapesFnTest(tf.test.TestCase): ...@@ -751,7 +771,10 @@ class PadInputDataToStaticShapesFnTest(tf.test.TestCase):
tf.placeholder(tf.float32, [None, 4]), tf.placeholder(tf.float32, [None, 4]),
fields.InputDataFields.groundtruth_classes: fields.InputDataFields.groundtruth_classes:
tf.placeholder(tf.int32, [None, 3]), tf.placeholder(tf.int32, [None, 3]),
fields.InputDataFields.true_image_shape: tf.placeholder(tf.int32, [3]), fields.InputDataFields.true_image_shape:
tf.placeholder(tf.int32, [3]),
fields.InputDataFields.original_image_spatial_shape:
tf.placeholder(tf.int32, [2])
} }
padded_tensor_dict = inputs.pad_input_data_to_static_shapes( padded_tensor_dict = inputs.pad_input_data_to_static_shapes(
tensor_dict=input_tensor_dict, tensor_dict=input_tensor_dict,
...@@ -765,6 +788,9 @@ class PadInputDataToStaticShapesFnTest(tf.test.TestCase): ...@@ -765,6 +788,9 @@ class PadInputDataToStaticShapesFnTest(tf.test.TestCase):
self.assertAllEqual( self.assertAllEqual(
padded_tensor_dict[fields.InputDataFields.true_image_shape] padded_tensor_dict[fields.InputDataFields.true_image_shape]
.shape.as_list(), [3]) .shape.as_list(), [3])
self.assertAllEqual(
padded_tensor_dict[fields.InputDataFields.original_image_spatial_shape]
.shape.as_list(), [2])
self.assertAllEqual( self.assertAllEqual(
padded_tensor_dict[fields.InputDataFields.groundtruth_boxes] padded_tensor_dict[fields.InputDataFields.groundtruth_boxes]
.shape.as_list(), [3, 4]) .shape.as_list(), [3, 4])
......
...@@ -504,13 +504,6 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): ...@@ -504,13 +504,6 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
for key in expected_shapes: for key in expected_shapes:
self.assertAllEqual(tensor_dict_out[key].shape, expected_shapes[key]) self.assertAllEqual(tensor_dict_out[key].shape, expected_shapes[key])
# BEGIN GOOGLE-INTERNAL
# TODO(bhattad): Remove conditional after CMLE moves to TF 1.11
@parameterized.parameters(
{'use_static_shapes': False},
{'use_static_shapes': True}
)
# END GOOGLE-INTERNAL
def test_predict_gives_correct_shapes_in_train_mode_both_stages( def test_predict_gives_correct_shapes_in_train_mode_both_stages(
self, self,
use_static_shapes=False): use_static_shapes=False):
...@@ -1187,16 +1180,6 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): ...@@ -1187,16 +1180,6 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
'Loss/BoxClassifierLoss/classification_loss'], 0) 'Loss/BoxClassifierLoss/classification_loss'], 0)
self.assertAllClose(loss_dict_out['Loss/BoxClassifierLoss/mask_loss'], 0) self.assertAllClose(loss_dict_out['Loss/BoxClassifierLoss/mask_loss'], 0)
# BEGIN GOOGLE-INTERNAL
# TODO(bhattad): Remove conditional after CMLE moves to TF 1.11
@parameterized.parameters(
{'use_static_shapes': False, 'shared_boxes': False},
{'use_static_shapes': False, 'shared_boxes': True},
{'use_static_shapes': True, 'shared_boxes': False},
{'use_static_shapes': True, 'shared_boxes': True},
)
# END GOOGLE-INTERNAL
def test_loss_full_zero_padded_proposals_nonzero_loss_with_two_images( def test_loss_full_zero_padded_proposals_nonzero_loss_with_two_images(
self, use_static_shapes=False, shared_boxes=False): self, use_static_shapes=False, shared_boxes=False):
batch_size = 2 batch_size = 2
......
...@@ -125,12 +125,13 @@ class SSDKerasFeatureExtractor(tf.keras.Model): ...@@ -125,12 +125,13 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
depth_multiplier, depth_multiplier,
min_depth, min_depth,
pad_to_multiple, pad_to_multiple,
conv_hyperparams_config, conv_hyperparams,
freeze_batchnorm, freeze_batchnorm,
inplace_batchnorm_update, inplace_batchnorm_update,
use_explicit_padding=False, use_explicit_padding=False,
use_depthwise=False, use_depthwise=False,
override_base_feature_extractor_hyperparams=False): override_base_feature_extractor_hyperparams=False,
name=None):
"""Constructor. """Constructor.
Args: Args:
...@@ -139,9 +140,9 @@ class SSDKerasFeatureExtractor(tf.keras.Model): ...@@ -139,9 +140,9 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
min_depth: minimum feature extractor depth. min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
conv_hyperparams_config: A hyperparams.proto object containing conv_hyperparams: `hyperparams_builder.KerasLayerHyperparams` object
convolution hyperparameters for the layers added on top of the containing convolution hyperparameters for the layers added on top of
base feature extractor. the base feature extractor.
freeze_batchnorm: Whether to freeze batch norm parameters during freeze_batchnorm: Whether to freeze batch norm parameters during
training or not. When training with a small batch size (e.g. 1), it is training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm desirable to freeze batch norm update and use pretrained batch norm
...@@ -156,14 +157,16 @@ class SSDKerasFeatureExtractor(tf.keras.Model): ...@@ -156,14 +157,16 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
override_base_feature_extractor_hyperparams: Whether to override override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from hyperparameters of the base feature extractor with the one from
`conv_hyperparams_config`. `conv_hyperparams_config`.
name: A string name scope to assign to the model. If 'None', Keras
will auto-generate one from the class name.
""" """
super(SSDKerasFeatureExtractor, self).__init__() super(SSDKerasFeatureExtractor, self).__init__(name=name)
self._is_training = is_training self._is_training = is_training
self._depth_multiplier = depth_multiplier self._depth_multiplier = depth_multiplier
self._min_depth = min_depth self._min_depth = min_depth
self._pad_to_multiple = pad_to_multiple self._pad_to_multiple = pad_to_multiple
self._conv_hyperparams_config = conv_hyperparams_config self._conv_hyperparams = conv_hyperparams
self._freeze_batchnorm = freeze_batchnorm self._freeze_batchnorm = freeze_batchnorm
self._inplace_batchnorm_update = inplace_batchnorm_update self._inplace_batchnorm_update = inplace_batchnorm_update
self._use_explicit_padding = use_explicit_padding self._use_explicit_padding = use_explicit_padding
......
...@@ -210,60 +210,6 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase, ...@@ -210,60 +210,6 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
self.assertAllClose(detections_out['num_detections'], self.assertAllClose(detections_out['num_detections'],
expected_num_detections) expected_num_detections)
# BEGIN GOOGLE-INTERNAL
# TODO(b/112621326): Remove conditional after CMLE moves to TF 1.11
def test_postprocess_results_are_correct_static(self, use_keras):
with tf.Graph().as_default():
_, _, _, _ = self._create_model(use_keras=use_keras)
def graph_fn(input_image):
model, _, _, _ = self._create_model(use_static_shapes=True,
nms_max_size_per_class=4)
preprocessed_inputs, true_image_shapes = model.preprocess(input_image)
prediction_dict = model.predict(preprocessed_inputs,
true_image_shapes)
detections = model.postprocess(prediction_dict, true_image_shapes)
return (detections['detection_boxes'], detections['detection_scores'],
detections['detection_classes'], detections['num_detections'])
batch_size = 2
image_size = 2
channels = 3
input_image = np.random.rand(batch_size, image_size, image_size,
channels).astype(np.float32)
expected_boxes = [
[
[0, 0, .5, .5],
[0, .5, .5, 1],
[.5, 0, 1, .5],
[0, 0, 0, 0]
], # padding
[
[0, 0, .5, .5],
[0, .5, .5, 1],
[.5, 0, 1, .5],
[0, 0, 0, 0]
]
] # padding
expected_scores = [[0, 0, 0, 0], [0, 0, 0, 0]]
expected_classes = [[0, 0, 0, 0], [0, 0, 0, 0]]
expected_num_detections = np.array([3, 3])
(detection_boxes, detection_scores, detection_classes,
num_detections) = self.execute(graph_fn, [input_image])
for image_idx in range(batch_size):
self.assertTrue(test_utils.first_rows_close_as_set(
detection_boxes[image_idx][
0:expected_num_detections[image_idx]].tolist(),
expected_boxes[image_idx][0:expected_num_detections[image_idx]]))
self.assertAllClose(
detection_scores[image_idx][0:expected_num_detections[image_idx]],
expected_scores[image_idx][0:expected_num_detections[image_idx]])
self.assertAllClose(
detection_classes[image_idx][0:expected_num_detections[image_idx]],
expected_classes[image_idx][0:expected_num_detections[image_idx]])
self.assertAllClose(num_detections,
expected_num_detections)
# END GOOGLE-INTERNAL
def test_loss_results_are_correct(self, use_keras): def test_loss_results_are_correct(self, use_keras):
......
...@@ -67,7 +67,7 @@ class FakeSSDKerasFeatureExtractor(ssd_meta_arch.SSDKerasFeatureExtractor): ...@@ -67,7 +67,7 @@ class FakeSSDKerasFeatureExtractor(ssd_meta_arch.SSDKerasFeatureExtractor):
depth_multiplier=0, depth_multiplier=0,
min_depth=0, min_depth=0,
pad_to_multiple=1, pad_to_multiple=1,
conv_hyperparams_config=None, conv_hyperparams=None,
freeze_batchnorm=False, freeze_batchnorm=False,
inplace_batchnorm_update=False, inplace_batchnorm_update=False,
) )
......
...@@ -377,9 +377,14 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False): ...@@ -377,9 +377,14 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
groundtruth = _prepare_groundtruth_for_eval(detection_model, groundtruth = _prepare_groundtruth_for_eval(detection_model,
class_agnostic) class_agnostic)
use_original_images = fields.InputDataFields.original_image in features use_original_images = fields.InputDataFields.original_image in features
eval_images = ( if use_original_images:
features[fields.InputDataFields.original_image] eval_images = tf.cast(tf.image.resize_bilinear(
if use_original_images else features[fields.InputDataFields.image]) features[fields.InputDataFields.original_image][0:1],
features[fields.InputDataFields.original_image_spatial_shape][0]),
tf.uint8)
else:
eval_images = features[fields.InputDataFields.image]
eval_dict = eval_util.result_dict_for_single_example( eval_dict = eval_util.result_dict_for_single_example(
eval_images[0:1], eval_images[0:1],
features[inputs.HASH_KEY][0], features[inputs.HASH_KEY][0],
...@@ -520,8 +525,7 @@ def create_estimator_and_inputs(run_config, ...@@ -520,8 +525,7 @@ def create_estimator_and_inputs(run_config,
configs = get_configs_from_pipeline_file(pipeline_config_path) configs = get_configs_from_pipeline_file(pipeline_config_path)
kwargs.update({ kwargs.update({
'train_steps': train_steps, 'train_steps': train_steps,
'sample_1_of_n_eval_examples': sample_1_of_n_eval_examples, 'sample_1_of_n_eval_examples': sample_1_of_n_eval_examples
'retain_original_images_in_eval': False if use_tpu else True,
}) })
if override_eval_num_epochs: if override_eval_num_epochs:
kwargs.update({'eval_num_epochs': 1}) kwargs.update({'eval_num_epochs': 1})
...@@ -586,10 +590,6 @@ def create_estimator_and_inputs(run_config, ...@@ -586,10 +590,6 @@ def create_estimator_and_inputs(run_config,
use_tpu=use_tpu, use_tpu=use_tpu,
config=run_config, config=run_config,
# TODO(lzc): Remove conditional after CMLE moves to TF 1.9 # TODO(lzc): Remove conditional after CMLE moves to TF 1.9
# BEGIN GOOGLE-INTERNAL
export_to_tpu=export_to_tpu,
eval_on_tpu=False, # Eval runs on CPU, so disable eval on TPU
# END GOOGLE-INTERNAL
params=params if params else {}) params=params if params else {})
else: else:
estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config) estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config)
......
...@@ -145,7 +145,7 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model): ...@@ -145,7 +145,7 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
if 'use_depthwise' in feature_map_layout: if 'use_depthwise' in feature_map_layout:
use_depthwise = feature_map_layout['use_depthwise'] use_depthwise = feature_map_layout['use_depthwise']
for index, from_layer in enumerate(feature_map_layout['from_layer']): for index, from_layer in enumerate(feature_map_layout['from_layer']):
net = tf.keras.Sequential(name='output_%d' % index) net = []
self.convolutions.append(net) self.convolutions.append(net)
layer_depth = feature_map_layout['layer_depth'][index] layer_depth = feature_map_layout['layer_depth'][index]
conv_kernel_size = 3 conv_kernel_size = 3
...@@ -157,17 +157,17 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model): ...@@ -157,17 +157,17 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
if insert_1x1_conv: if insert_1x1_conv:
layer_name = '{}_1_Conv2d_{}_1x1_{}'.format( layer_name = '{}_1_Conv2d_{}_1x1_{}'.format(
base_from_layer, index, depth_fn(layer_depth / 2)) base_from_layer, index, depth_fn(layer_depth / 2))
net.add(tf.keras.layers.Conv2D(depth_fn(layer_depth / 2), net.append(tf.keras.layers.Conv2D(depth_fn(layer_depth / 2),
[1, 1], [1, 1],
padding='SAME', padding='SAME',
strides=1, strides=1,
name=layer_name + '_conv', name=layer_name + '_conv',
**conv_hyperparams.params())) **conv_hyperparams.params()))
net.add( net.append(
conv_hyperparams.build_batch_norm( conv_hyperparams.build_batch_norm(
training=(is_training and not freeze_batchnorm), training=(is_training and not freeze_batchnorm),
name=layer_name + '_batchnorm')) name=layer_name + '_batchnorm'))
net.add( net.append(
conv_hyperparams.build_activation_layer( conv_hyperparams.build_activation_layer(
name=layer_name)) name=layer_name))
...@@ -182,51 +182,52 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model): ...@@ -182,51 +182,52 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
# conv_kernel_size, to avoid holding a reference to the loop variable # conv_kernel_size, to avoid holding a reference to the loop variable
# conv_kernel_size inside of a lambda function # conv_kernel_size inside of a lambda function
def fixed_padding(features, kernel_size=conv_kernel_size): def fixed_padding(features, kernel_size=conv_kernel_size):
ops.fixed_padding(features, kernel_size) return ops.fixed_padding(features, kernel_size)
net.add(tf.keras.layers.Lambda(fixed_padding)) net.append(tf.keras.layers.Lambda(fixed_padding))
# TODO(rathodv): Add some utilities to simplify the creation of # TODO(rathodv): Add some utilities to simplify the creation of
# Depthwise & non-depthwise convolutions w/ normalization & activations # Depthwise & non-depthwise convolutions w/ normalization & activations
if use_depthwise: if use_depthwise:
net.add(tf.keras.layers.DepthwiseConv2D( net.append(tf.keras.layers.DepthwiseConv2D(
[conv_kernel_size, conv_kernel_size], [conv_kernel_size, conv_kernel_size],
depth_multiplier=1, depth_multiplier=1,
padding=padding, padding=padding,
strides=stride, strides=stride,
name=layer_name + '_depthwise_conv', name=layer_name + '_depthwise_conv',
**conv_hyperparams.params())) **conv_hyperparams.params()))
net.add( net.append(
conv_hyperparams.build_batch_norm( conv_hyperparams.build_batch_norm(
training=(is_training and not freeze_batchnorm), training=(is_training and not freeze_batchnorm),
name=layer_name + '_depthwise_batchnorm')) name=layer_name + '_depthwise_batchnorm'))
net.add( net.append(
conv_hyperparams.build_activation_layer( conv_hyperparams.build_activation_layer(
name=layer_name + '_depthwise')) name=layer_name + '_depthwise'))
net.add(tf.keras.layers.Conv2D(depth_fn(layer_depth), [1, 1], net.append(tf.keras.layers.Conv2D(depth_fn(layer_depth), [1, 1],
padding='SAME', padding='SAME',
strides=1, strides=1,
name=layer_name + '_conv', name=layer_name + '_conv',
**conv_hyperparams.params())) **conv_hyperparams.params()))
net.add( net.append(
conv_hyperparams.build_batch_norm( conv_hyperparams.build_batch_norm(
training=(is_training and not freeze_batchnorm), training=(is_training and not freeze_batchnorm),
name=layer_name + '_batchnorm')) name=layer_name + '_batchnorm'))
net.add( net.append(
conv_hyperparams.build_activation_layer( conv_hyperparams.build_activation_layer(
name=layer_name)) name=layer_name))
else: else:
net.add(tf.keras.layers.Conv2D(depth_fn(layer_depth), net.append(tf.keras.layers.Conv2D(
[conv_kernel_size, conv_kernel_size], depth_fn(layer_depth),
padding=padding, [conv_kernel_size, conv_kernel_size],
strides=stride, padding=padding,
name=layer_name + '_conv', strides=stride,
**conv_hyperparams.params())) name=layer_name + '_conv',
net.add( **conv_hyperparams.params()))
net.append(
conv_hyperparams.build_batch_norm( conv_hyperparams.build_batch_norm(
training=(is_training and not freeze_batchnorm), training=(is_training and not freeze_batchnorm),
name=layer_name + '_batchnorm')) name=layer_name + '_batchnorm'))
net.add( net.append(
conv_hyperparams.build_activation_layer( conv_hyperparams.build_activation_layer(
name=layer_name)) name=layer_name))
...@@ -252,8 +253,9 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model): ...@@ -252,8 +253,9 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
feature_map_keys.append(from_layer) feature_map_keys.append(from_layer)
else: else:
feature_map = feature_maps[-1] feature_map = feature_maps[-1]
feature_map = self.convolutions[index](feature_map) for layer in self.convolutions[index]:
layer_name = self.convolutions[index].layers[-1].name feature_map = layer(feature_map)
layer_name = self.convolutions[index][-1].name
feature_map_keys.append(layer_name) feature_map_keys.append(layer_name)
feature_maps.append(feature_map) feature_maps.append(feature_map)
return collections.OrderedDict( return collections.OrderedDict(
......
...@@ -118,8 +118,8 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase): ...@@ -118,8 +118,8 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes) self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10 # TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
# BEGIN GOOGLE-INTERNAL
def test_get_expected_feature_map_shapes_with_inception_v2_use_depthwise( def test_get_expected_feature_map_shapes_use_explicit_padding(
self, use_keras): self, use_keras):
image_features = { image_features = {
'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32), 'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
...@@ -127,7 +127,7 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase): ...@@ -127,7 +127,7 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32) 'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32)
} }
layout_copy = INCEPTION_V2_LAYOUT.copy() layout_copy = INCEPTION_V2_LAYOUT.copy()
layout_copy['use_depthwise'] = True layout_copy['use_explicit_padding'] = True
feature_map_generator = self._build_feature_map_generator( feature_map_generator = self._build_feature_map_generator(
feature_map_layout=layout_copy, feature_map_layout=layout_copy,
use_keras=use_keras use_keras=use_keras
...@@ -149,7 +149,6 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase): ...@@ -149,7 +149,6 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
out_feature_map_shapes = dict( out_feature_map_shapes = dict(
(key, value.shape) for key, value in out_feature_maps.items()) (key, value.shape) for key, value in out_feature_maps.items())
self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes) self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
# END GOOGLE-INTERNAL
def test_get_expected_feature_map_shapes_with_inception_v3(self, use_keras): def test_get_expected_feature_map_shapes_with_inception_v3(self, use_keras):
image_features = { image_features = {
...@@ -238,18 +237,18 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase): ...@@ -238,18 +237,18 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
]) ])
expected_keras_variables = set([ expected_keras_variables = set([
'FeatureMaps/output_3/Mixed_5c_1_Conv2d_3_1x1_256_conv/kernel', 'FeatureMaps/Mixed_5c_1_Conv2d_3_1x1_256_conv/kernel',
'FeatureMaps/output_3/Mixed_5c_1_Conv2d_3_1x1_256_conv/bias', 'FeatureMaps/Mixed_5c_1_Conv2d_3_1x1_256_conv/bias',
'FeatureMaps/output_3/Mixed_5c_2_Conv2d_3_3x3_s2_512_conv/kernel', 'FeatureMaps/Mixed_5c_2_Conv2d_3_3x3_s2_512_conv/kernel',
'FeatureMaps/output_3/Mixed_5c_2_Conv2d_3_3x3_s2_512_conv/bias', 'FeatureMaps/Mixed_5c_2_Conv2d_3_3x3_s2_512_conv/bias',
'FeatureMaps/output_4/Mixed_5c_1_Conv2d_4_1x1_128_conv/kernel', 'FeatureMaps/Mixed_5c_1_Conv2d_4_1x1_128_conv/kernel',
'FeatureMaps/output_4/Mixed_5c_1_Conv2d_4_1x1_128_conv/bias', 'FeatureMaps/Mixed_5c_1_Conv2d_4_1x1_128_conv/bias',
'FeatureMaps/output_4/Mixed_5c_2_Conv2d_4_3x3_s2_256_conv/kernel', 'FeatureMaps/Mixed_5c_2_Conv2d_4_3x3_s2_256_conv/kernel',
'FeatureMaps/output_4/Mixed_5c_2_Conv2d_4_3x3_s2_256_conv/bias', 'FeatureMaps/Mixed_5c_2_Conv2d_4_3x3_s2_256_conv/bias',
'FeatureMaps/output_5/Mixed_5c_1_Conv2d_5_1x1_128_conv/kernel', 'FeatureMaps/Mixed_5c_1_Conv2d_5_1x1_128_conv/kernel',
'FeatureMaps/output_5/Mixed_5c_1_Conv2d_5_1x1_128_conv/bias', 'FeatureMaps/Mixed_5c_1_Conv2d_5_1x1_128_conv/bias',
'FeatureMaps/output_5/Mixed_5c_2_Conv2d_5_3x3_s2_256_conv/kernel', 'FeatureMaps/Mixed_5c_2_Conv2d_5_3x3_s2_256_conv/kernel',
'FeatureMaps/output_5/Mixed_5c_2_Conv2d_5_3x3_s2_256_conv/bias', 'FeatureMaps/Mixed_5c_2_Conv2d_5_3x3_s2_256_conv/bias',
]) ])
init_op = tf.global_variables_initializer() init_op = tf.global_variables_initializer()
...@@ -264,82 +263,6 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase): ...@@ -264,82 +263,6 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
self.assertSetEqual(expected_slim_variables, actual_variable_set) self.assertSetEqual(expected_slim_variables, actual_variable_set)
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10 # TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
# BEGIN GOOGLE-INTERNAL
def test_get_expected_variable_names_with_inception_v2_use_depthwise(
self,
use_keras):
image_features = {
'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32),
'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32)
}
layout_copy = INCEPTION_V2_LAYOUT.copy()
layout_copy['use_depthwise'] = True
feature_map_generator = self._build_feature_map_generator(
feature_map_layout=layout_copy,
use_keras=use_keras
)
feature_maps = feature_map_generator(image_features)
expected_slim_variables = set([
'Mixed_5c_1_Conv2d_3_1x1_256/weights',
'Mixed_5c_1_Conv2d_3_1x1_256/biases',
'Mixed_5c_2_Conv2d_3_3x3_s2_512_depthwise/depthwise_weights',
'Mixed_5c_2_Conv2d_3_3x3_s2_512_depthwise/biases',
'Mixed_5c_2_Conv2d_3_3x3_s2_512/weights',
'Mixed_5c_2_Conv2d_3_3x3_s2_512/biases',
'Mixed_5c_1_Conv2d_4_1x1_128/weights',
'Mixed_5c_1_Conv2d_4_1x1_128/biases',
'Mixed_5c_2_Conv2d_4_3x3_s2_256_depthwise/depthwise_weights',
'Mixed_5c_2_Conv2d_4_3x3_s2_256_depthwise/biases',
'Mixed_5c_2_Conv2d_4_3x3_s2_256/weights',
'Mixed_5c_2_Conv2d_4_3x3_s2_256/biases',
'Mixed_5c_1_Conv2d_5_1x1_128/weights',
'Mixed_5c_1_Conv2d_5_1x1_128/biases',
'Mixed_5c_2_Conv2d_5_3x3_s2_256_depthwise/depthwise_weights',
'Mixed_5c_2_Conv2d_5_3x3_s2_256_depthwise/biases',
'Mixed_5c_2_Conv2d_5_3x3_s2_256/weights',
'Mixed_5c_2_Conv2d_5_3x3_s2_256/biases',
])
expected_keras_variables = set([
'FeatureMaps/output_3/Mixed_5c_1_Conv2d_3_1x1_256_conv/kernel',
'FeatureMaps/output_3/Mixed_5c_1_Conv2d_3_1x1_256_conv/bias',
('FeatureMaps/output_3/Mixed_5c_2_Conv2d_3_3x3_s2_512_depthwise_conv/'
'depthwise_kernel'),
('FeatureMaps/output_3/Mixed_5c_2_Conv2d_3_3x3_s2_512_depthwise_conv/'
'bias'),
'FeatureMaps/output_3/Mixed_5c_2_Conv2d_3_3x3_s2_512_conv/kernel',
'FeatureMaps/output_3/Mixed_5c_2_Conv2d_3_3x3_s2_512_conv/bias',
'FeatureMaps/output_4/Mixed_5c_1_Conv2d_4_1x1_128_conv/kernel',
'FeatureMaps/output_4/Mixed_5c_1_Conv2d_4_1x1_128_conv/bias',
('FeatureMaps/output_4/Mixed_5c_2_Conv2d_4_3x3_s2_256_depthwise_conv/'
'depthwise_kernel'),
('FeatureMaps/output_4/Mixed_5c_2_Conv2d_4_3x3_s2_256_depthwise_conv/'
'bias'),
'FeatureMaps/output_4/Mixed_5c_2_Conv2d_4_3x3_s2_256_conv/kernel',
'FeatureMaps/output_4/Mixed_5c_2_Conv2d_4_3x3_s2_256_conv/bias',
'FeatureMaps/output_5/Mixed_5c_1_Conv2d_5_1x1_128_conv/kernel',
'FeatureMaps/output_5/Mixed_5c_1_Conv2d_5_1x1_128_conv/bias',
('FeatureMaps/output_5/Mixed_5c_2_Conv2d_5_3x3_s2_256_depthwise_conv/'
'depthwise_kernel'),
('FeatureMaps/output_5/Mixed_5c_2_Conv2d_5_3x3_s2_256_depthwise_conv/'
'bias'),
'FeatureMaps/output_5/Mixed_5c_2_Conv2d_5_3x3_s2_256_conv/kernel',
'FeatureMaps/output_5/Mixed_5c_2_Conv2d_5_3x3_s2_256_conv/bias',
])
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
sess.run(feature_maps)
actual_variable_set = set(
[var.op.name for var in tf.trainable_variables()])
if use_keras:
self.assertSetEqual(expected_keras_variables, actual_variable_set)
else:
self.assertSetEqual(expected_slim_variables, actual_variable_set)
# END GOOGLE-INTERNAL
class FPNFeatureMapGeneratorTest(tf.test.TestCase): class FPNFeatureMapGeneratorTest(tf.test.TestCase):
......
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A wrapper around the MobileNet v2 models for Keras, for object detection."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from object_detection.core import freezable_batch_norm
from object_detection.utils import ops
# pylint: disable=invalid-name
# This method copied from the slim mobilenet base network code (same license)
def _make_divisible(v, divisor, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
class _LayersOverride(object):
"""Alternative Keras layers interface for the Keras MobileNetV2."""
def __init__(self,
batchnorm_training,
default_batchnorm_momentum=0.999,
conv_hyperparams=None,
use_explicit_padding=False,
alpha=1.0,
min_depth=None):
"""Alternative tf.keras.layers interface, for use by the Keras MobileNetV2.
It is used by the Keras applications kwargs injection API to
modify the Mobilenet v2 Keras application with changes required by
the Object Detection API.
These injected interfaces make the following changes to the network:
- Applies the Object Detection hyperparameter configuration
- Supports FreezableBatchNorms
- Adds support for a min number of filters for each layer
- Makes the `alpha` parameter affect the final convolution block even if it
is less than 1.0
- Adds support for explicit padding of convolutions
Args:
batchnorm_training: Bool. Assigned to Batch norm layer `training` param
when constructing `freezable_batch_norm.FreezableBatchNorm` layers.
default_batchnorm_momentum: Float. When 'conv_hyperparams' is None,
batch norm layers will be constructed using this value as the momentum.
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
containing hyperparameters for convolution ops. Optionally set to `None`
to use default mobilenet_v2 layer builders.
use_explicit_padding: If True, use 'valid' padding for convolutions,
but explicitly pre-pads inputs so that the output dimensions are the
same as if 'same' padding were used. Off by default.
alpha: The width multiplier referenced in the MobileNetV2 paper. It
modifies the number of filters in each convolutional layer.
min_depth: Minimum number of filters in the convolutional layers.
"""
self._alpha = alpha
self._batchnorm_training = batchnorm_training
self._default_batchnorm_momentum = default_batchnorm_momentum
self._conv_hyperparams = conv_hyperparams
self._use_explicit_padding = use_explicit_padding
self._min_depth = min_depth
def _FixedPaddingLayer(self, kernel_size):
return tf.keras.layers.Lambda(lambda x: ops.fixed_padding(x, kernel_size))
def Conv2D(self, filters, **kwargs):
"""Builds a Conv2D layer according to the current Object Detection config.
Overrides the Keras MobileNetV2 application's convolutions with ones that
follow the spec specified by the Object Detection hyperparameters.
Args:
filters: The number of filters to use for the convolution.
**kwargs: Keyword args specified by the Keras application for
constructing the convolution.
Returns:
A one-arg callable that will either directly apply a Keras Conv2D layer to
the input argument, or that will first pad the input then apply a Conv2D
layer.
"""
# Make sure 'alpha' is always applied to the last convolution block's size
# (This overrides the Keras application's functionality)
if kwargs.get('name') == 'Conv_1' and self._alpha < 1.0:
filters = _make_divisible(1280 * self._alpha, 8)
# Apply the minimum depth to the convolution layers
if (self._min_depth and (filters < self._min_depth)
and not kwargs.get('name').endswith('expand')):
filters = self._min_depth
if self._conv_hyperparams:
kwargs = self._conv_hyperparams.params(**kwargs)
kwargs['padding'] = 'same'
kernel_size = kwargs.get('kernel_size')
if self._use_explicit_padding and kernel_size > 1:
kwargs['padding'] = 'valid'
def padded_conv(features):
padded_features = self._FixedPaddingLayer(kernel_size)(features)
return tf.keras.layers.Conv2D(filters, **kwargs)(padded_features)
return padded_conv
else:
return tf.keras.layers.Conv2D(filters, **kwargs)
def DepthwiseConv2D(self, **kwargs):
"""Builds a DepthwiseConv2D according to the Object Detection config.
Overrides the Keras MobileNetV2 application's convolutions with ones that
follow the spec specified by the Object Detection hyperparameters.
Args:
**kwargs: Keyword args specified by the Keras application for
constructing the convolution.
Returns:
A one-arg callable that will either directly apply a Keras DepthwiseConv2D
layer to the input argument, or that will first pad the input then apply
the depthwise convolution.
"""
if self._conv_hyperparams:
kwargs = self._conv_hyperparams.params(**kwargs)
kwargs['padding'] = 'same'
kernel_size = kwargs.get('kernel_size')
if self._use_explicit_padding and kernel_size > 1:
kwargs['padding'] = 'valid'
def padded_depthwise_conv(features):
padded_features = self._FixedPaddingLayer(kernel_size)(features)
return tf.keras.layers.DepthwiseConv2D(**kwargs)(padded_features)
return padded_depthwise_conv
else:
return tf.keras.layers.DepthwiseConv2D(**kwargs)
def BatchNormalization(self, **kwargs):
"""Builds a normalization layer.
Overrides the Keras application batch norm with the norm specified by the
Object Detection configuration.
Args:
**kwargs: Only the name is used, all other params ignored.
Required for matching `layers.BatchNormalization` calls in the Keras
application.
Returns:
A normalization layer specified by the Object Detection hyperparameter
configurations.
"""
name = kwargs.get('name')
if self._conv_hyperparams:
return self._conv_hyperparams.build_batch_norm(
training=self._batchnorm_training,
name=name)
else:
return freezable_batch_norm.FreezableBatchNorm(
training=self._batchnorm_training,
epsilon=1e-3,
momentum=self._default_batchnorm_momentum,
name=name)
def Input(self, shape):
"""Builds an Input layer.
Overrides the Keras application Input layer with one that uses a
tf.placeholder_with_default instead of a tf.placeholder. This is necessary
to ensure the application works when run on a TPU.
Args:
shape: The shape for the input layer to use. (Does not include a dimension
for the batch size).
Returns:
An input layer for the specified shape that internally uses a
placeholder_with_default.
"""
default_size = 224
default_batch_size = 1
shape = list(shape)
default_shape = [default_size if dim is None else dim for dim in shape]
input_tensor = tf.constant(0.0, shape=[default_batch_size] + default_shape)
placeholder_with_default = tf.placeholder_with_default(
input=input_tensor, shape=[None] + shape)
return tf.keras.layers.Input(tensor=placeholder_with_default)
# pylint: disable=unused-argument
def ReLU(self, *args, **kwargs):
"""Builds an activation layer.
Overrides the Keras application ReLU with the activation specified by the
Object Detection configuration.
Args:
*args: Ignored, required to match the `tf.keras.ReLU` interface
**kwargs: Only the name is used,
required to match `tf.keras.ReLU` interface
Returns:
An activation layer specified by the Object Detection hyperparameter
configurations.
"""
name = kwargs.get('name')
if self._conv_hyperparams:
return self._conv_hyperparams.build_activation_layer(name=name)
else:
return tf.keras.layers.Lambda(tf.nn.relu6, name=name)
# pylint: enable=unused-argument
# pylint: disable=unused-argument
def ZeroPadding2D(self, **kwargs):
"""Replaces explicit padding in the Keras application with a no-op.
Args:
**kwargs: Ignored, required to match the Keras applications usage.
Returns:
A no-op identity lambda.
"""
return lambda x: x
# pylint: enable=unused-argument
# Forward all non-overridden methods to the keras layers
def __getattr__(self, item):
return getattr(tf.keras.layers, item)
def mobilenet_v2(batchnorm_training,
default_batchnorm_momentum=0.9997,
conv_hyperparams=None,
use_explicit_padding=False,
alpha=1.0,
min_depth=None,
**kwargs):
"""Instantiates the MobileNetV2 architecture, modified for object detection.
This wraps the MobileNetV2 tensorflow Keras application, but uses the
Keras application's kwargs-based monkey-patching API to override the Keras
architecture with the following changes:
- Changes the default batchnorm momentum to 0.9997
- Applies the Object Detection hyperparameter configuration
- Supports FreezableBatchNorms
- Adds support for a min number of filters for each layer
- Makes the `alpha` parameter affect the final convolution block even if it
is less than 1.0
- Adds support for explicit padding of convolutions
- Makes the Input layer use a tf.placeholder_with_default instead of a
tf.placeholder, to work on TPUs.
Args:
batchnorm_training: Bool. Assigned to Batch norm layer `training` param
when constructing `freezable_batch_norm.FreezableBatchNorm` layers.
default_batchnorm_momentum: Float. When 'conv_hyperparams' is None,
batch norm layers will be constructed using this value as the momentum.
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
containing hyperparameters for convolution ops. Optionally set to `None`
to use default mobilenet_v2 layer builders.
use_explicit_padding: If True, use 'valid' padding for convolutions,
but explicitly pre-pads inputs so that the output dimensions are the
same as if 'same' padding were used. Off by default.
alpha: The width multiplier referenced in the MobileNetV2 paper. It
modifies the number of filters in each convolutional layer.
min_depth: Minimum number of filters in the convolutional layers.
**kwargs: Keyword arguments forwarded directly to the
`tf.keras.applications.MobilenetV2` method that constructs the Keras
model.
Returns:
A Keras model instance.
"""
layers_override = _LayersOverride(
batchnorm_training,
default_batchnorm_momentum=default_batchnorm_momentum,
conv_hyperparams=conv_hyperparams,
use_explicit_padding=use_explicit_padding,
min_depth=min_depth,
alpha=alpha)
return tf.keras.applications.MobileNetV2(alpha=alpha,
layers=layers_override,
**kwargs)
# pylint: enable=invalid-name
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for mobilenet_v2."""
import itertools
import numpy as np
import tensorflow as tf
from google.protobuf import text_format
from object_detection.builders import hyperparams_builder
from object_detection.models.keras_applications import mobilenet_v2
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
_layers_to_check = [
'Conv1_relu',
'block_1_expand_relu', 'block_1_depthwise_relu', 'block_1_project_BN',
'block_2_expand_relu', 'block_2_depthwise_relu', 'block_2_project_BN',
'block_3_expand_relu', 'block_3_depthwise_relu', 'block_3_project_BN',
'block_4_expand_relu', 'block_4_depthwise_relu', 'block_4_project_BN',
'block_5_expand_relu', 'block_5_depthwise_relu', 'block_5_project_BN',
'block_6_expand_relu', 'block_6_depthwise_relu', 'block_6_project_BN',
'block_7_expand_relu', 'block_7_depthwise_relu', 'block_7_project_BN',
'block_8_expand_relu', 'block_8_depthwise_relu', 'block_8_project_BN',
'block_9_expand_relu', 'block_9_depthwise_relu', 'block_9_project_BN',
'block_10_expand_relu', 'block_10_depthwise_relu', 'block_10_project_BN',
'block_11_expand_relu', 'block_11_depthwise_relu', 'block_11_project_BN',
'block_12_expand_relu', 'block_12_depthwise_relu', 'block_12_project_BN',
'block_13_expand_relu', 'block_13_depthwise_relu', 'block_13_project_BN',
'block_14_expand_relu', 'block_14_depthwise_relu', 'block_14_project_BN',
'block_15_expand_relu', 'block_15_depthwise_relu', 'block_15_project_BN',
'block_16_expand_relu', 'block_16_depthwise_relu', 'block_16_project_BN',
'out_relu']
class MobilenetV2Test(test_case.TestCase):
def _build_conv_hyperparams(self):
conv_hyperparams = hyperparams_pb2.Hyperparams()
conv_hyperparams_text_proto = """
activation: RELU_6
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
batch_norm {
train: true,
scale: false,
center: true,
decay: 0.2,
epsilon: 0.1,
}
"""
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
def _create_application_with_layer_outputs(
self, layer_names, batchnorm_training,
conv_hyperparams=None,
use_explicit_padding=False,
alpha=1.0,
min_depth=None):
"""Constructs Keras mobilenetv2 that extracts intermediate layer outputs."""
if not layer_names:
layer_names = _layers_to_check
full_model = mobilenet_v2.mobilenet_v2(
batchnorm_training=batchnorm_training,
conv_hyperparams=conv_hyperparams,
weights=None,
use_explicit_padding=use_explicit_padding,
alpha=alpha,
min_depth=min_depth,
include_top=False)
layer_outputs = [full_model.get_layer(name=layer).output
for layer in layer_names]
return tf.keras.Model(
inputs=full_model.inputs,
outputs=layer_outputs)
def _check_returns_correct_shape(
self, batch_size, image_height, image_width, depth_multiplier,
expected_feature_map_shapes, use_explicit_padding=False, min_depth=None,
layer_names=None):
def graph_fn(image_tensor):
model = self._create_application_with_layer_outputs(
layer_names=layer_names,
batchnorm_training=False, use_explicit_padding=use_explicit_padding,
min_depth=min_depth,
alpha=depth_multiplier)
return model(image_tensor)
image_tensor = np.random.rand(batch_size, image_height, image_width,
3).astype(np.float32)
feature_maps = self.execute(graph_fn, [image_tensor])
for feature_map, expected_shape in itertools.izip(
feature_maps, expected_feature_map_shapes):
self.assertAllEqual(feature_map.shape, expected_shape)
def _check_returns_correct_shapes_with_dynamic_inputs(
self, batch_size, image_height, image_width, depth_multiplier,
expected_feature_map_shapes, use_explicit_padding=False,
layer_names=None):
def graph_fn(image_height, image_width):
image_tensor = tf.random_uniform([batch_size, image_height, image_width,
3], dtype=tf.float32)
model = self._create_application_with_layer_outputs(
layer_names=layer_names,
batchnorm_training=False, use_explicit_padding=use_explicit_padding,
alpha=depth_multiplier)
return model(image_tensor)
feature_maps = self.execute_cpu(graph_fn, [
np.array(image_height, dtype=np.int32),
np.array(image_width, dtype=np.int32)
])
for feature_map, expected_shape in itertools.izip(
feature_maps, expected_feature_map_shapes):
self.assertAllEqual(feature_map.shape, expected_shape)
def _get_variables(self, depth_multiplier, layer_names=None):
g = tf.Graph()
with g.as_default():
preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
model = self._create_application_with_layer_outputs(
layer_names=layer_names,
batchnorm_training=False, use_explicit_padding=False,
alpha=depth_multiplier)
model(preprocessed_inputs)
return g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
def test_returns_correct_shapes_128(self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
expected_feature_map_shape = [(2, 64, 64, 32),
(2, 64, 64, 96),
(2, 32, 32, 96),
(2, 32, 32, 24),
(2, 32, 32, 144),
(2, 32, 32, 144),
(2, 32, 32, 24),
(2, 32, 32, 144),
(2, 16, 16, 144),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 16, 16, 192),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 16, 16, 192),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 8, 8, 192),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 8, 8, 576),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 8, 8, 576),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 4, 4, 576),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 320),
(2, 4, 4, 1280)]
self._check_returns_correct_shape(
2, image_height, image_width, depth_multiplier,
expected_feature_map_shape)
def test_returns_correct_shapes_128_explicit_padding(
self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
expected_feature_map_shape = [(2, 64, 64, 32),
(2, 64, 64, 96),
(2, 32, 32, 96),
(2, 32, 32, 24),
(2, 32, 32, 144),
(2, 32, 32, 144),
(2, 32, 32, 24),
(2, 32, 32, 144),
(2, 16, 16, 144),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 16, 16, 192),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 16, 16, 192),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 8, 8, 192),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 8, 8, 576),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 8, 8, 576),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 4, 4, 576),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 320),
(2, 4, 4, 1280)]
self._check_returns_correct_shape(
2, image_height, image_width, depth_multiplier,
expected_feature_map_shape, use_explicit_padding=True)
def test_returns_correct_shapes_with_dynamic_inputs(
self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
expected_feature_map_shape = [(2, 64, 64, 32),
(2, 64, 64, 96),
(2, 32, 32, 96),
(2, 32, 32, 24),
(2, 32, 32, 144),
(2, 32, 32, 144),
(2, 32, 32, 24),
(2, 32, 32, 144),
(2, 16, 16, 144),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 16, 16, 192),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 16, 16, 192),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 8, 8, 192),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 8, 8, 576),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 8, 8, 576),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 4, 4, 576),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 320),
(2, 4, 4, 1280)]
self._check_returns_correct_shapes_with_dynamic_inputs(
2, image_height, image_width, depth_multiplier,
expected_feature_map_shape)
def test_returns_correct_shapes_299(self):
image_height = 299
image_width = 299
depth_multiplier = 1.0
expected_feature_map_shape = [(2, 150, 150, 32),
(2, 150, 150, 96),
(2, 75, 75, 96),
(2, 75, 75, 24),
(2, 75, 75, 144),
(2, 75, 75, 144),
(2, 75, 75, 24),
(2, 75, 75, 144),
(2, 38, 38, 144),
(2, 38, 38, 32),
(2, 38, 38, 192),
(2, 38, 38, 192),
(2, 38, 38, 32),
(2, 38, 38, 192),
(2, 38, 38, 192),
(2, 38, 38, 32),
(2, 38, 38, 192),
(2, 19, 19, 192),
(2, 19, 19, 64),
(2, 19, 19, 384),
(2, 19, 19, 384),
(2, 19, 19, 64),
(2, 19, 19, 384),
(2, 19, 19, 384),
(2, 19, 19, 64),
(2, 19, 19, 384),
(2, 19, 19, 384),
(2, 19, 19, 64),
(2, 19, 19, 384),
(2, 19, 19, 384),
(2, 19, 19, 96),
(2, 19, 19, 576),
(2, 19, 19, 576),
(2, 19, 19, 96),
(2, 19, 19, 576),
(2, 19, 19, 576),
(2, 19, 19, 96),
(2, 19, 19, 576),
(2, 10, 10, 576),
(2, 10, 10, 160),
(2, 10, 10, 960),
(2, 10, 10, 960),
(2, 10, 10, 160),
(2, 10, 10, 960),
(2, 10, 10, 960),
(2, 10, 10, 160),
(2, 10, 10, 960),
(2, 10, 10, 960),
(2, 10, 10, 320),
(2, 10, 10, 1280)]
self._check_returns_correct_shape(
2, image_height, image_width, depth_multiplier,
expected_feature_map_shape)
def test_returns_correct_shapes_enforcing_min_depth(
self):
image_height = 299
image_width = 299
depth_multiplier = 0.5**12
expected_feature_map_shape = [(2, 150, 150, 32),
(2, 150, 150, 192),
(2, 75, 75, 192),
(2, 75, 75, 32),
(2, 75, 75, 192),
(2, 75, 75, 192),
(2, 75, 75, 32),
(2, 75, 75, 192),
(2, 38, 38, 192),
(2, 38, 38, 32),
(2, 38, 38, 192),
(2, 38, 38, 192),
(2, 38, 38, 32),
(2, 38, 38, 192),
(2, 38, 38, 192),
(2, 38, 38, 32),
(2, 38, 38, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 10, 10, 192),
(2, 10, 10, 32),
(2, 10, 10, 192),
(2, 10, 10, 192),
(2, 10, 10, 32),
(2, 10, 10, 192),
(2, 10, 10, 192),
(2, 10, 10, 32),
(2, 10, 10, 192),
(2, 10, 10, 192),
(2, 10, 10, 32),
(2, 10, 10, 32)]
self._check_returns_correct_shape(
2, image_height, image_width, depth_multiplier,
expected_feature_map_shape, min_depth=32)
def test_hyperparam_override(self):
hyperparams = self._build_conv_hyperparams()
model = mobilenet_v2.mobilenet_v2(
batchnorm_training=True,
conv_hyperparams=hyperparams,
weights=None,
use_explicit_padding=False,
alpha=1.0,
min_depth=32,
include_top=False)
hyperparams.params()
bn_layer = model.get_layer(name='block_5_project_BN')
self.assertAllClose(bn_layer.momentum, 0.2)
self.assertAllClose(bn_layer.epsilon, 0.1)
def test_variable_count(self):
depth_multiplier = 1
variables = self._get_variables(depth_multiplier)
self.assertEqual(len(variables), 260)
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment