Unverified Commit 1f484095 authored by pkulzc's avatar pkulzc Committed by GitHub
Browse files

Minor fixes for object detection.

214018767  by Zhichao Lu:

    Add original_image_spatial_shape tensor in input dictionary to store shape of the original input image

--
213914693  by lzc:

    Internal change.

--
213872175  by Zhichao Lu:

    This CL adds a Keras-based mobilenet_v2 feature extractor for object detection models.

    As part of this CL, we use the Keras mobilenet_v2 application's keyword argument layer injection API to allow the generated network to support the object detection hyperparameters.

--
213848499  by Zhichao Lu:

    Replace tf.image.resize_nearest_neighbor with tf.image.resize_images. tf.image.resize_nearest_neighbor only supports 4-D tensors but masks is a 3-D tensor.

--
213758622  by lzc:

    Internal change.

--

PiperOrigin-RevId: 214018767
parent 99256cf4
......@@ -163,6 +163,10 @@ class KerasLayerHyperparams(object):
new_params['activation'] = None
if include_activation:
new_params['activation'] = self._activation_fn
if self.use_batch_norm() and self.batch_norm_params()['center']:
new_params['use_bias'] = False
else:
new_params['use_bias'] = True
new_params.update(**overrides)
return new_params
......
......@@ -50,11 +50,6 @@ from object_detection.models.ssd_mobilenet_v2_fpn_feature_extractor import SSDMo
from object_detection.predictors import rfcn_box_predictor
from object_detection.protos import model_pb2
from object_detection.utils import ops
# BEGIN GOOGLE-INTERNAL
# TODO(lzc): move ssd_mask_meta_arch to third party when it has decent
# performance relative to a comparable Mask R-CNN model (b/112561592).
from google3.image.understanding.object_detection.meta_architectures import ssd_mask_meta_arch
# END GOOGLE-INTERNAL
# A map of names to SSD feature extractors.
SSD_FEATURE_EXTRACTOR_CLASS_MAP = {
......@@ -254,23 +249,6 @@ def _build_ssd_model(ssd_config, is_training, add_summaries,
desired_negative_sampling_ratio)
ssd_meta_arch_fn = ssd_meta_arch.SSDMetaArch
# BEGIN GOOGLE-INTERNAL
# TODO(lzc): move ssd_mask_meta_arch to third party when it has decent
# performance relative to a comparable Mask R-CNN model (b/112561592).
predictor_config = ssd_config.box_predictor
predict_instance_masks = False
if predictor_config.WhichOneof(
'box_predictor_oneof') == 'convolutional_box_predictor':
predict_instance_masks = (
predictor_config.convolutional_box_predictor.HasField('mask_head'))
elif predictor_config.WhichOneof(
'box_predictor_oneof') == 'weight_shared_convolutional_box_predictor':
predict_instance_masks = (
predictor_config.weight_shared_convolutional_box_predictor.HasField(
'mask_head'))
if predict_instance_masks:
ssd_meta_arch_fn = ssd_mask_meta_arch.SSDMaskMetaArch
# END GOOGLE-INTERNAL
return ssd_meta_arch_fn(
is_training=is_training,
......
......@@ -40,11 +40,6 @@ from object_detection.models.ssd_mobilenet_v1_ppn_feature_extractor import SSDMo
from object_detection.models.ssd_mobilenet_v2_feature_extractor import SSDMobileNetV2FeatureExtractor
from object_detection.models.ssd_mobilenet_v2_fpn_feature_extractor import SSDMobileNetV2FpnFeatureExtractor
from object_detection.protos import model_pb2
# BEGIN GOOGLE-INTERNAL
# TODO(lzc): move ssd_mask_meta_arch to third party when it has decent
# performance relative to a comparable Mask R-CNN model (b/112561592).
from google3.image.understanding.object_detection.meta_architectures import ssd_mask_meta_arch
# END GOOGLE-INTERNAL
FRCNN_RESNET_FEAT_MAPS = {
'faster_rcnn_resnet50':
......@@ -169,161 +164,6 @@ class ModelBuilderTest(tf.test.TestCase, parameterized.TestCase):
'desired_negative_sampling_ratio': 2
})
# BEGIN GOOGLE-INTERNAL
# TODO(lzc): move ssd_mask_meta_arch to third party when it has decent
# performance relative to a comparable Mask R-CNN model (b/112561592).
def test_create_ssd_conv_predictor_model_with_mask(self):
model_text_proto = """
ssd {
feature_extractor {
type: 'ssd_inception_v2'
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
override_base_feature_extractor_hyperparams: true
}
box_coder {
faster_rcnn_box_coder {
}
}
matcher {
argmax_matcher {
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
aspect_ratios: 1.0
}
}
image_resizer {
fixed_shape_resizer {
height: 320
width: 320
}
}
box_predictor {
convolutional_box_predictor {
mask_head {
}
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
}
loss {
classification_loss {
weighted_softmax {
}
}
localization_loss {
weighted_smooth_l1 {
}
}
}
use_expected_classification_loss_under_sampling: true
minimum_negative_sampling: 10
desired_negative_sampling_ratio: 2
}"""
model_proto = model_pb2.DetectionModel()
text_format.Merge(model_text_proto, model_proto)
model = self.create_model(model_proto)
self.assertIsInstance(model, ssd_mask_meta_arch.SSDMaskMetaArch)
def test_create_ssd_weight_shared_predictor_model_with_mask(self):
model_text_proto = """
ssd {
feature_extractor {
type: 'ssd_inception_v2'
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
override_base_feature_extractor_hyperparams: true
}
box_coder {
faster_rcnn_box_coder {
}
}
matcher {
argmax_matcher {
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
aspect_ratios: 1.0
}
}
image_resizer {
fixed_shape_resizer {
height: 320
width: 320
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
mask_head {
}
depth: 32
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
random_normal_initializer {
}
}
}
num_layers_before_predictor: 1
}
}
loss {
classification_loss {
weighted_softmax {
}
}
localization_loss {
weighted_smooth_l1 {
}
}
}
use_expected_classification_loss_under_sampling: true
minimum_negative_sampling: 10
desired_negative_sampling_ratio: 2
}"""
model_proto = model_pb2.DetectionModel()
text_format.Merge(model_text_proto, model_proto)
model = self.create_model(model_proto)
self.assertIsInstance(model, ssd_mask_meta_arch.SSDMaskMetaArch)
# END GOOGLE-INTERNAL
def test_create_ssd_inception_v3_model_from_config(self):
model_text_proto = """
......
......@@ -58,55 +58,6 @@ class MulticlassNonMaxSuppressionTest(test_case.TestCase):
self.assertAllClose(nms_classes_output, exp_nms_classes)
# TODO(bhattad): Remove conditional after CMLE moves to TF 1.9
# BEGIN GOOGLE-INTERNAL
def test_multiclass_nms_select_with_shared_boxes_pad_to_max_output_size(self):
boxes = np.array([[[0, 0, 1, 1]],
[[0, 0.1, 1, 1.1]],
[[0, -0.1, 1, 0.9]],
[[0, 10, 1, 11]],
[[0, 10.1, 1, 11.1]],
[[0, 100, 1, 101]],
[[0, 1000, 1, 1002]],
[[0, 1000, 1, 1002.1]]], np.float32)
scores = np.array([[.9, 0.01], [.75, 0.05],
[.6, 0.01], [.95, 0],
[.5, 0.01], [.3, 0.01],
[.01, .85], [.01, .5]], np.float32)
score_thresh = 0.1
iou_thresh = .5
max_size_per_class = 4
max_output_size = 5
exp_nms_corners = [[0, 10, 1, 11],
[0, 0, 1, 1],
[0, 1000, 1, 1002],
[0, 100, 1, 101]]
exp_nms_scores = [.95, .9, .85, .3]
exp_nms_classes = [0, 0, 1, 0]
def graph_fn(boxes, scores):
nms, num_valid_nms_boxes = post_processing.multiclass_non_max_suppression(
boxes,
scores,
score_thresh,
iou_thresh,
max_size_per_class,
max_total_size=max_output_size,
pad_to_max_output_size=True)
return [nms.get(), nms.get_field(fields.BoxListFields.scores),
nms.get_field(fields.BoxListFields.classes), num_valid_nms_boxes]
[nms_corners_output, nms_scores_output, nms_classes_output,
num_valid_nms_boxes] = self.execute(graph_fn, [boxes, scores])
self.assertEqual(num_valid_nms_boxes, 4)
self.assertAllClose(nms_corners_output[0:num_valid_nms_boxes],
exp_nms_corners)
self.assertAllClose(nms_scores_output[0:num_valid_nms_boxes],
exp_nms_scores)
self.assertAllClose(nms_classes_output[0:num_valid_nms_boxes],
exp_nms_classes)
# END GOOGLE-INTERNAL
def test_multiclass_nms_select_with_shared_boxes_given_keypoints(self):
boxes = tf.constant([[[0, 0, 1, 1]],
......@@ -1126,61 +1077,6 @@ class MulticlassNonMaxSuppressionTest(test_case.TestCase):
self.assertAllClose(num_detections, [1, 1])
# TODO(bhattad): Remove conditional after CMLE moves to TF 1.9
# BEGIN GOOGLE-INTERNAL
def test_batch_multiclass_nms_with_use_static_shapes(self):
boxes = np.array([[[[0, 0, 1, 1], [0, 0, 4, 5]],
[[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
[[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
[[0, 10, 1, 11], [0, 10, 1, 11]]],
[[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
[[0, 100, 1, 101], [0, 100, 1, 101]],
[[0, 1000, 1, 1002], [0, 999, 2, 1004]],
[[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
np.float32)
scores = np.array([[[.9, 0.01], [.75, 0.05],
[.6, 0.01], [.95, 0]],
[[.5, 0.01], [.3, 0.01],
[.01, .85], [.01, .5]]],
np.float32)
clip_window = np.array([[0., 0., 5., 5.],
[0., 0., 200., 200.]],
np.float32)
score_thresh = 0.1
iou_thresh = .5
max_output_size = 4
exp_nms_corners = np.array([[[0, 0, 1, 1],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]],
[[0, 10.1, 1, 11.1],
[0, 100, 1, 101],
[0, 0, 0, 0],
[0, 0, 0, 0]]])
exp_nms_scores = np.array([[.9, 0., 0., 0.],
[.5, .3, 0, 0]])
exp_nms_classes = np.array([[0, 0, 0, 0],
[0, 0, 0, 0]])
def graph_fn(boxes, scores, clip_window):
(nmsed_boxes, nmsed_scores, nmsed_classes, _, _, num_detections
) = post_processing.batch_multiclass_non_max_suppression(
boxes, scores, score_thresh, iou_thresh,
max_size_per_class=max_output_size, clip_window=clip_window,
use_static_shapes=True)
return nmsed_boxes, nmsed_scores, nmsed_classes, num_detections
(nmsed_boxes, nmsed_scores, nmsed_classes,
num_detections) = self.execute(graph_fn, [boxes, scores, clip_window])
for i in range(len(num_detections)):
self.assertAllClose(nmsed_boxes[i, 0:num_detections[i]],
exp_nms_corners[i, 0:num_detections[i]])
self.assertAllClose(nmsed_scores[i, 0:num_detections[i]],
exp_nms_scores[i, 0:num_detections[i]])
self.assertAllClose(nmsed_classes[i, 0:num_detections[i]],
exp_nms_classes[i, 0:num_detections[i]])
self.assertAllClose(num_detections, [1, 2])
# END GOOGLE-INTERNAL
if __name__ == '__main__':
tf.test.main()
......@@ -811,8 +811,10 @@ def random_image_scale(image,
image, [image_newysize, image_newxsize], align_corners=True)
result.append(image)
if masks is not None:
masks = tf.image.resize_nearest_neighbor(
masks, [image_newysize, image_newxsize], align_corners=True)
masks = tf.image.resize_images(
masks, [image_newysize, image_newxsize],
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
align_corners=True)
result.append(masks)
return tuple(result)
......
......@@ -36,6 +36,7 @@ class InputDataFields(object):
image: image.
image_additional_channels: additional channels.
original_image: image in the original input size.
original_image_spatial_shape: image in the original input size.
key: unique key corresponding to image.
source_id: source of the original image.
filename: original filename of the dataset (without common path).
......@@ -70,6 +71,7 @@ class InputDataFields(object):
image = 'image'
image_additional_channels = 'image_additional_channels'
original_image = 'original_image'
original_image_spatial_shape = 'original_image_spatial_shape'
key = 'key'
source_id = 'source_id'
filename = 'filename'
......
......@@ -322,6 +322,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
A dictionary of the following tensors.
fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3]
containing image.
fields.InputDataFields.original_image_spatial_shape - 1D int32 tensor of
shape [2] containing shape of the image.
fields.InputDataFields.source_id - string tensor containing original
image id.
fields.InputDataFields.key - string tensor with unique sha256 hash key.
......@@ -365,6 +367,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
is_crowd = fields.InputDataFields.groundtruth_is_crowd
tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
tensor_dict[fields.InputDataFields.original_image_spatial_shape] = tf.shape(
tensor_dict[fields.InputDataFields.image])[:2]
tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape(
tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]
......
......@@ -97,13 +97,17 @@ class TfExampleDecoderTest(tf.test.TestCase):
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual(
(tensor_dict[fields.InputDataFields.image].get_shape().as_list()),
[None, None, 3])
self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
get_shape().as_list()), [None, None, 3])
self.assertAllEqual((tensor_dict[fields.InputDataFields.
original_image_spatial_shape].
get_shape().as_list()), [2])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(decoded_jpeg, tensor_dict[fields.InputDataFields.image])
self.assertAllEqual([4, 5], tensor_dict[fields.InputDataFields.
original_image_spatial_shape])
self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
def testDecodeImageKeyAndFilename(self):
......@@ -141,13 +145,17 @@ class TfExampleDecoderTest(tf.test.TestCase):
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual(
(tensor_dict[fields.InputDataFields.image].get_shape().as_list()),
[None, None, 3])
self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
get_shape().as_list()), [None, None, 3])
self.assertAllEqual((tensor_dict[fields.InputDataFields.
original_image_spatial_shape].
get_shape().as_list()), [2])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(decoded_png, tensor_dict[fields.InputDataFields.image])
self.assertAllEqual([4, 5], tensor_dict[fields.InputDataFields.
original_image_spatial_shape])
self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
def testDecodePngInstanceMasks(self):
......
......@@ -103,7 +103,8 @@ def transform_input_data(tensor_dict,
if retain_original_image:
tensor_dict[fields.InputDataFields.original_image] = tf.cast(
tensor_dict[fields.InputDataFields.image], tf.uint8)
image_resizer_fn(tensor_dict[fields.InputDataFields.image], None)[0],
tf.uint8)
# Apply data augmentation ops.
if data_augmentation_fn is not None:
......@@ -199,6 +200,7 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes,
fields.InputDataFields.image: [
height, width, 3 + num_additional_channels
],
fields.InputDataFields.original_image_spatial_shape: [2],
fields.InputDataFields.image_additional_channels: [
height, width, num_additional_channels
],
......@@ -230,7 +232,7 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes,
if fields.InputDataFields.original_image in tensor_dict:
padding_shapes[fields.InputDataFields.original_image] = [
None, None, 3 + num_additional_channels
height, width, 3 + num_additional_channels
]
if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
tensor_shape = (
......@@ -364,7 +366,9 @@ def _get_features_dict(input_dict):
input_dict[fields.InputDataFields.image],
HASH_KEY: tf.cast(hash_from_source_id, tf.int32),
fields.InputDataFields.true_image_shape:
input_dict[fields.InputDataFields.true_image_shape]
input_dict[fields.InputDataFields.true_image_shape],
fields.InputDataFields.original_image_spatial_shape:
input_dict[fields.InputDataFields.original_image_spatial_shape]
}
if fields.InputDataFields.original_image in input_dict:
features[fields.InputDataFields.original_image] = input_dict[
......@@ -479,8 +483,6 @@ def create_train_input_fn(train_config, train_input_config,
def create_eval_input_fn(eval_config, eval_input_config, model_config):
"""Creates an eval `input` function for `Estimator`.
# TODO(ronnyvotel,rathodv): Allow batch sizes of more than 1 for eval.
Args:
eval_config: An eval_pb2.EvalConfig.
eval_input_config: An input_reader_pb2.InputReader.
......@@ -562,7 +564,7 @@ def create_eval_input_fn(eval_config, eval_input_config, model_config):
return (_get_features_dict(tensor_dict), _get_labels_dict(tensor_dict))
dataset = INPUT_BUILDER_UTIL_MAP['dataset_build'](
eval_input_config,
batch_size=1, # Currently only support batch size of 1 for eval.
batch_size=params['batch_size'] if params else eval_config.batch_size,
transform_input_data_fn=transform_and_pad_input_data_fn)
return dataset
......
......@@ -20,6 +20,7 @@ from __future__ import print_function
import functools
import os
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
......@@ -28,6 +29,7 @@ from object_detection import inputs
from object_detection.core import preprocessor
from object_detection.core import standard_fields as fields
from object_detection.utils import config_util
from object_detection.utils import test_case
FLAGS = tf.flags.FLAGS
......@@ -64,7 +66,7 @@ def _make_initializable_iterator(dataset):
return iterator
class InputsTest(tf.test.TestCase):
class InputsTest(test_case.TestCase, parameterized.TestCase):
def test_faster_rcnn_resnet50_train_input(self):
"""Tests the training input function for FasterRcnnResnet50."""
......@@ -103,52 +105,59 @@ class InputsTest(tf.test.TestCase):
self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_weights].dtype)
def test_faster_rcnn_resnet50_eval_input(self):
@parameterized.parameters(
{'eval_batch_size': 1},
{'eval_batch_size': 8}
)
def test_faster_rcnn_resnet50_eval_input(self, eval_batch_size=1):
"""Tests the eval input function for FasterRcnnResnet50."""
configs = _get_configs_for_model('faster_rcnn_resnet50_pets')
model_config = configs['model']
model_config.faster_rcnn.num_classes = 37
eval_config = configs['eval_config']
eval_config.batch_size = eval_batch_size
eval_input_fn = inputs.create_eval_input_fn(
configs['eval_config'], configs['eval_input_configs'][0], model_config)
eval_config, configs['eval_input_configs'][0], model_config)
features, labels = _make_initializable_iterator(eval_input_fn()).get_next()
self.assertAllEqual([1, None, None, 3],
self.assertAllEqual([eval_batch_size, None, None, 3],
features[fields.InputDataFields.image].shape.as_list())
self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype)
self.assertAllEqual(
[1, None, None, 3],
[eval_batch_size, None, None, 3],
features[fields.InputDataFields.original_image].shape.as_list())
self.assertEqual(tf.uint8,
features[fields.InputDataFields.original_image].dtype)
self.assertAllEqual([1], features[inputs.HASH_KEY].shape.as_list())
self.assertAllEqual([eval_batch_size],
features[inputs.HASH_KEY].shape.as_list())
self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype)
self.assertAllEqual(
[1, 100, 4],
[eval_batch_size, 100, 4],
labels[fields.InputDataFields.groundtruth_boxes].shape.as_list())
self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_boxes].dtype)
self.assertAllEqual(
[1, 100, model_config.faster_rcnn.num_classes],
[eval_batch_size, 100, model_config.faster_rcnn.num_classes],
labels[fields.InputDataFields.groundtruth_classes].shape.as_list())
self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_classes].dtype)
self.assertAllEqual(
[1, 100, model_config.faster_rcnn.num_classes],
[eval_batch_size, 100, model_config.faster_rcnn.num_classes],
labels[fields.InputDataFields.groundtruth_confidences].shape.as_list())
self.assertEqual(
tf.float32,
labels[fields.InputDataFields.groundtruth_confidences].dtype)
self.assertAllEqual(
[1, 100],
[eval_batch_size, 100],
labels[fields.InputDataFields.groundtruth_area].shape.as_list())
self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_area].dtype)
self.assertAllEqual(
[1, 100],
[eval_batch_size, 100],
labels[fields.InputDataFields.groundtruth_is_crowd].shape.as_list())
self.assertEqual(
tf.bool, labels[fields.InputDataFields.groundtruth_is_crowd].dtype)
self.assertAllEqual(
[1, 100],
[eval_batch_size, 100],
labels[fields.InputDataFields.groundtruth_difficult].shape.as_list())
self.assertEqual(
tf.int32, labels[fields.InputDataFields.groundtruth_difficult].dtype)
......@@ -197,53 +206,60 @@ class InputsTest(tf.test.TestCase):
self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_weights].dtype)
def test_ssd_inceptionV2_eval_input(self):
@parameterized.parameters(
{'eval_batch_size': 1},
{'eval_batch_size': 8}
)
def test_ssd_inceptionV2_eval_input(self, eval_batch_size=1):
"""Tests the eval input function for SSDInceptionV2."""
configs = _get_configs_for_model('ssd_inception_v2_pets')
model_config = configs['model']
model_config.ssd.num_classes = 37
eval_config = configs['eval_config']
eval_config.batch_size = eval_batch_size
eval_input_fn = inputs.create_eval_input_fn(
configs['eval_config'], configs['eval_input_configs'][0], model_config)
eval_config, configs['eval_input_configs'][0], model_config)
features, labels = _make_initializable_iterator(eval_input_fn()).get_next()
self.assertAllEqual([1, 300, 300, 3],
self.assertAllEqual([eval_batch_size, 300, 300, 3],
features[fields.InputDataFields.image].shape.as_list())
self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype)
self.assertAllEqual(
[1, None, None, 3],
[eval_batch_size, 300, 300, 3],
features[fields.InputDataFields.original_image].shape.as_list())
self.assertEqual(tf.uint8,
features[fields.InputDataFields.original_image].dtype)
self.assertAllEqual([1], features[inputs.HASH_KEY].shape.as_list())
self.assertAllEqual([eval_batch_size],
features[inputs.HASH_KEY].shape.as_list())
self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype)
self.assertAllEqual(
[1, 100, 4],
[eval_batch_size, 100, 4],
labels[fields.InputDataFields.groundtruth_boxes].shape.as_list())
self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_boxes].dtype)
self.assertAllEqual(
[1, 100, model_config.ssd.num_classes],
[eval_batch_size, 100, model_config.ssd.num_classes],
labels[fields.InputDataFields.groundtruth_classes].shape.as_list())
self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_classes].dtype)
self.assertAllEqual(
[1, 100, model_config.ssd.num_classes],
[eval_batch_size, 100, model_config.ssd.num_classes],
labels[
fields.InputDataFields.groundtruth_confidences].shape.as_list())
self.assertEqual(
tf.float32,
labels[fields.InputDataFields.groundtruth_confidences].dtype)
self.assertAllEqual(
[1, 100],
[eval_batch_size, 100],
labels[fields.InputDataFields.groundtruth_area].shape.as_list())
self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_area].dtype)
self.assertAllEqual(
[1, 100],
[eval_batch_size, 100],
labels[fields.InputDataFields.groundtruth_is_crowd].shape.as_list())
self.assertEqual(
tf.bool, labels[fields.InputDataFields.groundtruth_is_crowd].dtype)
self.assertAllEqual(
[1, 100],
[eval_batch_size, 100],
labels[fields.InputDataFields.groundtruth_difficult].shape.as_list())
self.assertEqual(
tf.int32, labels[fields.InputDataFields.groundtruth_difficult].dtype)
......@@ -379,7 +395,7 @@ class InputsTest(tf.test.TestCase):
self.assertEqual(out_string, '2798129067578209328')
class DataAugmentationFnTest(tf.test.TestCase):
class DataAugmentationFnTest(test_case.TestCase):
def test_apply_image_and_box_augmentation(self):
data_augmentation_options = [
......@@ -529,7 +545,7 @@ def _fake_image_resizer_fn(image, mask):
return (image, mask, tf.shape(image))
class DataTransformationFnTest(tf.test.TestCase):
class DataTransformationFnTest(test_case.TestCase):
def test_combine_additional_channels_if_present(self):
image = np.random.rand(4, 4, 3).astype(np.float32)
......@@ -622,7 +638,9 @@ class DataTransformationFnTest(tf.test.TestCase):
fields.InputDataFields.groundtruth_instance_masks:
tf.constant(np.random.rand(2, 4, 4).astype(np.float32)),
fields.InputDataFields.groundtruth_classes:
tf.constant(np.array([3, 1], np.int32))
tf.constant(np.array([3, 1], np.int32)),
fields.InputDataFields.original_image_spatial_shape:
tf.constant(np.array([4, 4], np.int32))
}
def fake_image_resizer_fn(image, masks=None):
......@@ -649,7 +667,9 @@ class DataTransformationFnTest(tf.test.TestCase):
self.assertAllEqual(transformed_inputs[
fields.InputDataFields.original_image].dtype, tf.uint8)
self.assertAllEqual(transformed_inputs[
fields.InputDataFields.original_image].shape, [4, 4, 3])
fields.InputDataFields.original_image_spatial_shape], [4, 4])
self.assertAllEqual(transformed_inputs[
fields.InputDataFields.original_image].shape, [8, 8, 3])
self.assertAllEqual(transformed_inputs[
fields.InputDataFields.groundtruth_instance_masks].shape, [2, 8, 8])
......@@ -741,7 +761,7 @@ class DataTransformationFnTest(tf.test.TestCase):
(np_image + 5) * 2)
class PadInputDataToStaticShapesFnTest(tf.test.TestCase):
class PadInputDataToStaticShapesFnTest(test_case.TestCase):
def test_pad_images_boxes_and_classes(self):
input_tensor_dict = {
......@@ -751,7 +771,10 @@ class PadInputDataToStaticShapesFnTest(tf.test.TestCase):
tf.placeholder(tf.float32, [None, 4]),
fields.InputDataFields.groundtruth_classes:
tf.placeholder(tf.int32, [None, 3]),
fields.InputDataFields.true_image_shape: tf.placeholder(tf.int32, [3]),
fields.InputDataFields.true_image_shape:
tf.placeholder(tf.int32, [3]),
fields.InputDataFields.original_image_spatial_shape:
tf.placeholder(tf.int32, [2])
}
padded_tensor_dict = inputs.pad_input_data_to_static_shapes(
tensor_dict=input_tensor_dict,
......@@ -765,6 +788,9 @@ class PadInputDataToStaticShapesFnTest(tf.test.TestCase):
self.assertAllEqual(
padded_tensor_dict[fields.InputDataFields.true_image_shape]
.shape.as_list(), [3])
self.assertAllEqual(
padded_tensor_dict[fields.InputDataFields.original_image_spatial_shape]
.shape.as_list(), [2])
self.assertAllEqual(
padded_tensor_dict[fields.InputDataFields.groundtruth_boxes]
.shape.as_list(), [3, 4])
......
......@@ -504,13 +504,6 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
for key in expected_shapes:
self.assertAllEqual(tensor_dict_out[key].shape, expected_shapes[key])
# BEGIN GOOGLE-INTERNAL
# TODO(bhattad): Remove conditional after CMLE moves to TF 1.11
@parameterized.parameters(
{'use_static_shapes': False},
{'use_static_shapes': True}
)
# END GOOGLE-INTERNAL
def test_predict_gives_correct_shapes_in_train_mode_both_stages(
self,
use_static_shapes=False):
......@@ -1187,16 +1180,6 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
'Loss/BoxClassifierLoss/classification_loss'], 0)
self.assertAllClose(loss_dict_out['Loss/BoxClassifierLoss/mask_loss'], 0)
# BEGIN GOOGLE-INTERNAL
# TODO(bhattad): Remove conditional after CMLE moves to TF 1.11
@parameterized.parameters(
{'use_static_shapes': False, 'shared_boxes': False},
{'use_static_shapes': False, 'shared_boxes': True},
{'use_static_shapes': True, 'shared_boxes': False},
{'use_static_shapes': True, 'shared_boxes': True},
)
# END GOOGLE-INTERNAL
def test_loss_full_zero_padded_proposals_nonzero_loss_with_two_images(
self, use_static_shapes=False, shared_boxes=False):
batch_size = 2
......
......@@ -125,12 +125,13 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams_config,
conv_hyperparams,
freeze_batchnorm,
inplace_batchnorm_update,
use_explicit_padding=False,
use_depthwise=False,
override_base_feature_extractor_hyperparams=False):
override_base_feature_extractor_hyperparams=False,
name=None):
"""Constructor.
Args:
......@@ -139,9 +140,9 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams_config: A hyperparams.proto object containing
convolution hyperparameters for the layers added on top of the
base feature extractor.
conv_hyperparams: `hyperparams_builder.KerasLayerHyperparams` object
containing convolution hyperparameters for the layers added on top of
the base feature extractor.
freeze_batchnorm: Whether to freeze batch norm parameters during
training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm
......@@ -156,14 +157,16 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_config`.
name: A string name scope to assign to the model. If 'None', Keras
will auto-generate one from the class name.
"""
super(SSDKerasFeatureExtractor, self).__init__()
super(SSDKerasFeatureExtractor, self).__init__(name=name)
self._is_training = is_training
self._depth_multiplier = depth_multiplier
self._min_depth = min_depth
self._pad_to_multiple = pad_to_multiple
self._conv_hyperparams_config = conv_hyperparams_config
self._conv_hyperparams = conv_hyperparams
self._freeze_batchnorm = freeze_batchnorm
self._inplace_batchnorm_update = inplace_batchnorm_update
self._use_explicit_padding = use_explicit_padding
......
......@@ -210,60 +210,6 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
self.assertAllClose(detections_out['num_detections'],
expected_num_detections)
# BEGIN GOOGLE-INTERNAL
# TODO(b/112621326): Remove conditional after CMLE moves to TF 1.11
def test_postprocess_results_are_correct_static(self, use_keras):
with tf.Graph().as_default():
_, _, _, _ = self._create_model(use_keras=use_keras)
def graph_fn(input_image):
model, _, _, _ = self._create_model(use_static_shapes=True,
nms_max_size_per_class=4)
preprocessed_inputs, true_image_shapes = model.preprocess(input_image)
prediction_dict = model.predict(preprocessed_inputs,
true_image_shapes)
detections = model.postprocess(prediction_dict, true_image_shapes)
return (detections['detection_boxes'], detections['detection_scores'],
detections['detection_classes'], detections['num_detections'])
batch_size = 2
image_size = 2
channels = 3
input_image = np.random.rand(batch_size, image_size, image_size,
channels).astype(np.float32)
expected_boxes = [
[
[0, 0, .5, .5],
[0, .5, .5, 1],
[.5, 0, 1, .5],
[0, 0, 0, 0]
], # padding
[
[0, 0, .5, .5],
[0, .5, .5, 1],
[.5, 0, 1, .5],
[0, 0, 0, 0]
]
] # padding
expected_scores = [[0, 0, 0, 0], [0, 0, 0, 0]]
expected_classes = [[0, 0, 0, 0], [0, 0, 0, 0]]
expected_num_detections = np.array([3, 3])
(detection_boxes, detection_scores, detection_classes,
num_detections) = self.execute(graph_fn, [input_image])
for image_idx in range(batch_size):
self.assertTrue(test_utils.first_rows_close_as_set(
detection_boxes[image_idx][
0:expected_num_detections[image_idx]].tolist(),
expected_boxes[image_idx][0:expected_num_detections[image_idx]]))
self.assertAllClose(
detection_scores[image_idx][0:expected_num_detections[image_idx]],
expected_scores[image_idx][0:expected_num_detections[image_idx]])
self.assertAllClose(
detection_classes[image_idx][0:expected_num_detections[image_idx]],
expected_classes[image_idx][0:expected_num_detections[image_idx]])
self.assertAllClose(num_detections,
expected_num_detections)
# END GOOGLE-INTERNAL
def test_loss_results_are_correct(self, use_keras):
......
......@@ -67,7 +67,7 @@ class FakeSSDKerasFeatureExtractor(ssd_meta_arch.SSDKerasFeatureExtractor):
depth_multiplier=0,
min_depth=0,
pad_to_multiple=1,
conv_hyperparams_config=None,
conv_hyperparams=None,
freeze_batchnorm=False,
inplace_batchnorm_update=False,
)
......
......@@ -377,9 +377,14 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
groundtruth = _prepare_groundtruth_for_eval(detection_model,
class_agnostic)
use_original_images = fields.InputDataFields.original_image in features
eval_images = (
features[fields.InputDataFields.original_image]
if use_original_images else features[fields.InputDataFields.image])
if use_original_images:
eval_images = tf.cast(tf.image.resize_bilinear(
features[fields.InputDataFields.original_image][0:1],
features[fields.InputDataFields.original_image_spatial_shape][0]),
tf.uint8)
else:
eval_images = features[fields.InputDataFields.image]
eval_dict = eval_util.result_dict_for_single_example(
eval_images[0:1],
features[inputs.HASH_KEY][0],
......@@ -520,8 +525,7 @@ def create_estimator_and_inputs(run_config,
configs = get_configs_from_pipeline_file(pipeline_config_path)
kwargs.update({
'train_steps': train_steps,
'sample_1_of_n_eval_examples': sample_1_of_n_eval_examples,
'retain_original_images_in_eval': False if use_tpu else True,
'sample_1_of_n_eval_examples': sample_1_of_n_eval_examples
})
if override_eval_num_epochs:
kwargs.update({'eval_num_epochs': 1})
......@@ -586,10 +590,6 @@ def create_estimator_and_inputs(run_config,
use_tpu=use_tpu,
config=run_config,
# TODO(lzc): Remove conditional after CMLE moves to TF 1.9
# BEGIN GOOGLE-INTERNAL
export_to_tpu=export_to_tpu,
eval_on_tpu=False, # Eval runs on CPU, so disable eval on TPU
# END GOOGLE-INTERNAL
params=params if params else {})
else:
estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config)
......
......@@ -145,7 +145,7 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
if 'use_depthwise' in feature_map_layout:
use_depthwise = feature_map_layout['use_depthwise']
for index, from_layer in enumerate(feature_map_layout['from_layer']):
net = tf.keras.Sequential(name='output_%d' % index)
net = []
self.convolutions.append(net)
layer_depth = feature_map_layout['layer_depth'][index]
conv_kernel_size = 3
......@@ -157,17 +157,17 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
if insert_1x1_conv:
layer_name = '{}_1_Conv2d_{}_1x1_{}'.format(
base_from_layer, index, depth_fn(layer_depth / 2))
net.add(tf.keras.layers.Conv2D(depth_fn(layer_depth / 2),
[1, 1],
padding='SAME',
strides=1,
name=layer_name + '_conv',
**conv_hyperparams.params()))
net.add(
net.append(tf.keras.layers.Conv2D(depth_fn(layer_depth / 2),
[1, 1],
padding='SAME',
strides=1,
name=layer_name + '_conv',
**conv_hyperparams.params()))
net.append(
conv_hyperparams.build_batch_norm(
training=(is_training and not freeze_batchnorm),
name=layer_name + '_batchnorm'))
net.add(
net.append(
conv_hyperparams.build_activation_layer(
name=layer_name))
......@@ -182,51 +182,52 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
# conv_kernel_size, to avoid holding a reference to the loop variable
# conv_kernel_size inside of a lambda function
def fixed_padding(features, kernel_size=conv_kernel_size):
ops.fixed_padding(features, kernel_size)
net.add(tf.keras.layers.Lambda(fixed_padding))
return ops.fixed_padding(features, kernel_size)
net.append(tf.keras.layers.Lambda(fixed_padding))
# TODO(rathodv): Add some utilities to simplify the creation of
# Depthwise & non-depthwise convolutions w/ normalization & activations
if use_depthwise:
net.add(tf.keras.layers.DepthwiseConv2D(
net.append(tf.keras.layers.DepthwiseConv2D(
[conv_kernel_size, conv_kernel_size],
depth_multiplier=1,
padding=padding,
strides=stride,
name=layer_name + '_depthwise_conv',
**conv_hyperparams.params()))
net.add(
net.append(
conv_hyperparams.build_batch_norm(
training=(is_training and not freeze_batchnorm),
name=layer_name + '_depthwise_batchnorm'))
net.add(
net.append(
conv_hyperparams.build_activation_layer(
name=layer_name + '_depthwise'))
net.add(tf.keras.layers.Conv2D(depth_fn(layer_depth), [1, 1],
padding='SAME',
strides=1,
name=layer_name + '_conv',
**conv_hyperparams.params()))
net.add(
net.append(tf.keras.layers.Conv2D(depth_fn(layer_depth), [1, 1],
padding='SAME',
strides=1,
name=layer_name + '_conv',
**conv_hyperparams.params()))
net.append(
conv_hyperparams.build_batch_norm(
training=(is_training and not freeze_batchnorm),
name=layer_name + '_batchnorm'))
net.add(
net.append(
conv_hyperparams.build_activation_layer(
name=layer_name))
else:
net.add(tf.keras.layers.Conv2D(depth_fn(layer_depth),
[conv_kernel_size, conv_kernel_size],
padding=padding,
strides=stride,
name=layer_name + '_conv',
**conv_hyperparams.params()))
net.add(
net.append(tf.keras.layers.Conv2D(
depth_fn(layer_depth),
[conv_kernel_size, conv_kernel_size],
padding=padding,
strides=stride,
name=layer_name + '_conv',
**conv_hyperparams.params()))
net.append(
conv_hyperparams.build_batch_norm(
training=(is_training and not freeze_batchnorm),
name=layer_name + '_batchnorm'))
net.add(
net.append(
conv_hyperparams.build_activation_layer(
name=layer_name))
......@@ -252,8 +253,9 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
feature_map_keys.append(from_layer)
else:
feature_map = feature_maps[-1]
feature_map = self.convolutions[index](feature_map)
layer_name = self.convolutions[index].layers[-1].name
for layer in self.convolutions[index]:
feature_map = layer(feature_map)
layer_name = self.convolutions[index][-1].name
feature_map_keys.append(layer_name)
feature_maps.append(feature_map)
return collections.OrderedDict(
......
......@@ -118,8 +118,8 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
# BEGIN GOOGLE-INTERNAL
def test_get_expected_feature_map_shapes_with_inception_v2_use_depthwise(
def test_get_expected_feature_map_shapes_use_explicit_padding(
self, use_keras):
image_features = {
'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
......@@ -127,7 +127,7 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32)
}
layout_copy = INCEPTION_V2_LAYOUT.copy()
layout_copy['use_depthwise'] = True
layout_copy['use_explicit_padding'] = True
feature_map_generator = self._build_feature_map_generator(
feature_map_layout=layout_copy,
use_keras=use_keras
......@@ -149,7 +149,6 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
out_feature_map_shapes = dict(
(key, value.shape) for key, value in out_feature_maps.items())
self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
# END GOOGLE-INTERNAL
def test_get_expected_feature_map_shapes_with_inception_v3(self, use_keras):
image_features = {
......@@ -238,18 +237,18 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
])
expected_keras_variables = set([
'FeatureMaps/output_3/Mixed_5c_1_Conv2d_3_1x1_256_conv/kernel',
'FeatureMaps/output_3/Mixed_5c_1_Conv2d_3_1x1_256_conv/bias',
'FeatureMaps/output_3/Mixed_5c_2_Conv2d_3_3x3_s2_512_conv/kernel',
'FeatureMaps/output_3/Mixed_5c_2_Conv2d_3_3x3_s2_512_conv/bias',
'FeatureMaps/output_4/Mixed_5c_1_Conv2d_4_1x1_128_conv/kernel',
'FeatureMaps/output_4/Mixed_5c_1_Conv2d_4_1x1_128_conv/bias',
'FeatureMaps/output_4/Mixed_5c_2_Conv2d_4_3x3_s2_256_conv/kernel',
'FeatureMaps/output_4/Mixed_5c_2_Conv2d_4_3x3_s2_256_conv/bias',
'FeatureMaps/output_5/Mixed_5c_1_Conv2d_5_1x1_128_conv/kernel',
'FeatureMaps/output_5/Mixed_5c_1_Conv2d_5_1x1_128_conv/bias',
'FeatureMaps/output_5/Mixed_5c_2_Conv2d_5_3x3_s2_256_conv/kernel',
'FeatureMaps/output_5/Mixed_5c_2_Conv2d_5_3x3_s2_256_conv/bias',
'FeatureMaps/Mixed_5c_1_Conv2d_3_1x1_256_conv/kernel',
'FeatureMaps/Mixed_5c_1_Conv2d_3_1x1_256_conv/bias',
'FeatureMaps/Mixed_5c_2_Conv2d_3_3x3_s2_512_conv/kernel',
'FeatureMaps/Mixed_5c_2_Conv2d_3_3x3_s2_512_conv/bias',
'FeatureMaps/Mixed_5c_1_Conv2d_4_1x1_128_conv/kernel',
'FeatureMaps/Mixed_5c_1_Conv2d_4_1x1_128_conv/bias',
'FeatureMaps/Mixed_5c_2_Conv2d_4_3x3_s2_256_conv/kernel',
'FeatureMaps/Mixed_5c_2_Conv2d_4_3x3_s2_256_conv/bias',
'FeatureMaps/Mixed_5c_1_Conv2d_5_1x1_128_conv/kernel',
'FeatureMaps/Mixed_5c_1_Conv2d_5_1x1_128_conv/bias',
'FeatureMaps/Mixed_5c_2_Conv2d_5_3x3_s2_256_conv/kernel',
'FeatureMaps/Mixed_5c_2_Conv2d_5_3x3_s2_256_conv/bias',
])
init_op = tf.global_variables_initializer()
......@@ -264,82 +263,6 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
self.assertSetEqual(expected_slim_variables, actual_variable_set)
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
# BEGIN GOOGLE-INTERNAL
def test_get_expected_variable_names_with_inception_v2_use_depthwise(
self,
use_keras):
image_features = {
'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32),
'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32)
}
layout_copy = INCEPTION_V2_LAYOUT.copy()
layout_copy['use_depthwise'] = True
feature_map_generator = self._build_feature_map_generator(
feature_map_layout=layout_copy,
use_keras=use_keras
)
feature_maps = feature_map_generator(image_features)
expected_slim_variables = set([
'Mixed_5c_1_Conv2d_3_1x1_256/weights',
'Mixed_5c_1_Conv2d_3_1x1_256/biases',
'Mixed_5c_2_Conv2d_3_3x3_s2_512_depthwise/depthwise_weights',
'Mixed_5c_2_Conv2d_3_3x3_s2_512_depthwise/biases',
'Mixed_5c_2_Conv2d_3_3x3_s2_512/weights',
'Mixed_5c_2_Conv2d_3_3x3_s2_512/biases',
'Mixed_5c_1_Conv2d_4_1x1_128/weights',
'Mixed_5c_1_Conv2d_4_1x1_128/biases',
'Mixed_5c_2_Conv2d_4_3x3_s2_256_depthwise/depthwise_weights',
'Mixed_5c_2_Conv2d_4_3x3_s2_256_depthwise/biases',
'Mixed_5c_2_Conv2d_4_3x3_s2_256/weights',
'Mixed_5c_2_Conv2d_4_3x3_s2_256/biases',
'Mixed_5c_1_Conv2d_5_1x1_128/weights',
'Mixed_5c_1_Conv2d_5_1x1_128/biases',
'Mixed_5c_2_Conv2d_5_3x3_s2_256_depthwise/depthwise_weights',
'Mixed_5c_2_Conv2d_5_3x3_s2_256_depthwise/biases',
'Mixed_5c_2_Conv2d_5_3x3_s2_256/weights',
'Mixed_5c_2_Conv2d_5_3x3_s2_256/biases',
])
expected_keras_variables = set([
'FeatureMaps/output_3/Mixed_5c_1_Conv2d_3_1x1_256_conv/kernel',
'FeatureMaps/output_3/Mixed_5c_1_Conv2d_3_1x1_256_conv/bias',
('FeatureMaps/output_3/Mixed_5c_2_Conv2d_3_3x3_s2_512_depthwise_conv/'
'depthwise_kernel'),
('FeatureMaps/output_3/Mixed_5c_2_Conv2d_3_3x3_s2_512_depthwise_conv/'
'bias'),
'FeatureMaps/output_3/Mixed_5c_2_Conv2d_3_3x3_s2_512_conv/kernel',
'FeatureMaps/output_3/Mixed_5c_2_Conv2d_3_3x3_s2_512_conv/bias',
'FeatureMaps/output_4/Mixed_5c_1_Conv2d_4_1x1_128_conv/kernel',
'FeatureMaps/output_4/Mixed_5c_1_Conv2d_4_1x1_128_conv/bias',
('FeatureMaps/output_4/Mixed_5c_2_Conv2d_4_3x3_s2_256_depthwise_conv/'
'depthwise_kernel'),
('FeatureMaps/output_4/Mixed_5c_2_Conv2d_4_3x3_s2_256_depthwise_conv/'
'bias'),
'FeatureMaps/output_4/Mixed_5c_2_Conv2d_4_3x3_s2_256_conv/kernel',
'FeatureMaps/output_4/Mixed_5c_2_Conv2d_4_3x3_s2_256_conv/bias',
'FeatureMaps/output_5/Mixed_5c_1_Conv2d_5_1x1_128_conv/kernel',
'FeatureMaps/output_5/Mixed_5c_1_Conv2d_5_1x1_128_conv/bias',
('FeatureMaps/output_5/Mixed_5c_2_Conv2d_5_3x3_s2_256_depthwise_conv/'
'depthwise_kernel'),
('FeatureMaps/output_5/Mixed_5c_2_Conv2d_5_3x3_s2_256_depthwise_conv/'
'bias'),
'FeatureMaps/output_5/Mixed_5c_2_Conv2d_5_3x3_s2_256_conv/kernel',
'FeatureMaps/output_5/Mixed_5c_2_Conv2d_5_3x3_s2_256_conv/bias',
])
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
sess.run(feature_maps)
actual_variable_set = set(
[var.op.name for var in tf.trainable_variables()])
if use_keras:
self.assertSetEqual(expected_keras_variables, actual_variable_set)
else:
self.assertSetEqual(expected_slim_variables, actual_variable_set)
# END GOOGLE-INTERNAL
class FPNFeatureMapGeneratorTest(tf.test.TestCase):
......
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A wrapper around the MobileNet v2 models for Keras, for object detection."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from object_detection.core import freezable_batch_norm
from object_detection.utils import ops
# pylint: disable=invalid-name
# This method copied from the slim mobilenet base network code (same license)
def _make_divisible(v, divisor, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
class _LayersOverride(object):
"""Alternative Keras layers interface for the Keras MobileNetV2."""
def __init__(self,
batchnorm_training,
default_batchnorm_momentum=0.999,
conv_hyperparams=None,
use_explicit_padding=False,
alpha=1.0,
min_depth=None):
"""Alternative tf.keras.layers interface, for use by the Keras MobileNetV2.
It is used by the Keras applications kwargs injection API to
modify the Mobilenet v2 Keras application with changes required by
the Object Detection API.
These injected interfaces make the following changes to the network:
- Applies the Object Detection hyperparameter configuration
- Supports FreezableBatchNorms
- Adds support for a min number of filters for each layer
- Makes the `alpha` parameter affect the final convolution block even if it
is less than 1.0
- Adds support for explicit padding of convolutions
Args:
batchnorm_training: Bool. Assigned to Batch norm layer `training` param
when constructing `freezable_batch_norm.FreezableBatchNorm` layers.
default_batchnorm_momentum: Float. When 'conv_hyperparams' is None,
batch norm layers will be constructed using this value as the momentum.
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
containing hyperparameters for convolution ops. Optionally set to `None`
to use default mobilenet_v2 layer builders.
use_explicit_padding: If True, use 'valid' padding for convolutions,
but explicitly pre-pads inputs so that the output dimensions are the
same as if 'same' padding were used. Off by default.
alpha: The width multiplier referenced in the MobileNetV2 paper. It
modifies the number of filters in each convolutional layer.
min_depth: Minimum number of filters in the convolutional layers.
"""
self._alpha = alpha
self._batchnorm_training = batchnorm_training
self._default_batchnorm_momentum = default_batchnorm_momentum
self._conv_hyperparams = conv_hyperparams
self._use_explicit_padding = use_explicit_padding
self._min_depth = min_depth
def _FixedPaddingLayer(self, kernel_size):
return tf.keras.layers.Lambda(lambda x: ops.fixed_padding(x, kernel_size))
def Conv2D(self, filters, **kwargs):
"""Builds a Conv2D layer according to the current Object Detection config.
Overrides the Keras MobileNetV2 application's convolutions with ones that
follow the spec specified by the Object Detection hyperparameters.
Args:
filters: The number of filters to use for the convolution.
**kwargs: Keyword args specified by the Keras application for
constructing the convolution.
Returns:
A one-arg callable that will either directly apply a Keras Conv2D layer to
the input argument, or that will first pad the input then apply a Conv2D
layer.
"""
# Make sure 'alpha' is always applied to the last convolution block's size
# (This overrides the Keras application's functionality)
if kwargs.get('name') == 'Conv_1' and self._alpha < 1.0:
filters = _make_divisible(1280 * self._alpha, 8)
# Apply the minimum depth to the convolution layers
if (self._min_depth and (filters < self._min_depth)
and not kwargs.get('name').endswith('expand')):
filters = self._min_depth
if self._conv_hyperparams:
kwargs = self._conv_hyperparams.params(**kwargs)
kwargs['padding'] = 'same'
kernel_size = kwargs.get('kernel_size')
if self._use_explicit_padding and kernel_size > 1:
kwargs['padding'] = 'valid'
def padded_conv(features):
padded_features = self._FixedPaddingLayer(kernel_size)(features)
return tf.keras.layers.Conv2D(filters, **kwargs)(padded_features)
return padded_conv
else:
return tf.keras.layers.Conv2D(filters, **kwargs)
def DepthwiseConv2D(self, **kwargs):
"""Builds a DepthwiseConv2D according to the Object Detection config.
Overrides the Keras MobileNetV2 application's convolutions with ones that
follow the spec specified by the Object Detection hyperparameters.
Args:
**kwargs: Keyword args specified by the Keras application for
constructing the convolution.
Returns:
A one-arg callable that will either directly apply a Keras DepthwiseConv2D
layer to the input argument, or that will first pad the input then apply
the depthwise convolution.
"""
if self._conv_hyperparams:
kwargs = self._conv_hyperparams.params(**kwargs)
kwargs['padding'] = 'same'
kernel_size = kwargs.get('kernel_size')
if self._use_explicit_padding and kernel_size > 1:
kwargs['padding'] = 'valid'
def padded_depthwise_conv(features):
padded_features = self._FixedPaddingLayer(kernel_size)(features)
return tf.keras.layers.DepthwiseConv2D(**kwargs)(padded_features)
return padded_depthwise_conv
else:
return tf.keras.layers.DepthwiseConv2D(**kwargs)
def BatchNormalization(self, **kwargs):
"""Builds a normalization layer.
Overrides the Keras application batch norm with the norm specified by the
Object Detection configuration.
Args:
**kwargs: Only the name is used, all other params ignored.
Required for matching `layers.BatchNormalization` calls in the Keras
application.
Returns:
A normalization layer specified by the Object Detection hyperparameter
configurations.
"""
name = kwargs.get('name')
if self._conv_hyperparams:
return self._conv_hyperparams.build_batch_norm(
training=self._batchnorm_training,
name=name)
else:
return freezable_batch_norm.FreezableBatchNorm(
training=self._batchnorm_training,
epsilon=1e-3,
momentum=self._default_batchnorm_momentum,
name=name)
def Input(self, shape):
"""Builds an Input layer.
Overrides the Keras application Input layer with one that uses a
tf.placeholder_with_default instead of a tf.placeholder. This is necessary
to ensure the application works when run on a TPU.
Args:
shape: The shape for the input layer to use. (Does not include a dimension
for the batch size).
Returns:
An input layer for the specified shape that internally uses a
placeholder_with_default.
"""
default_size = 224
default_batch_size = 1
shape = list(shape)
default_shape = [default_size if dim is None else dim for dim in shape]
input_tensor = tf.constant(0.0, shape=[default_batch_size] + default_shape)
placeholder_with_default = tf.placeholder_with_default(
input=input_tensor, shape=[None] + shape)
return tf.keras.layers.Input(tensor=placeholder_with_default)
# pylint: disable=unused-argument
def ReLU(self, *args, **kwargs):
"""Builds an activation layer.
Overrides the Keras application ReLU with the activation specified by the
Object Detection configuration.
Args:
*args: Ignored, required to match the `tf.keras.ReLU` interface
**kwargs: Only the name is used,
required to match `tf.keras.ReLU` interface
Returns:
An activation layer specified by the Object Detection hyperparameter
configurations.
"""
name = kwargs.get('name')
if self._conv_hyperparams:
return self._conv_hyperparams.build_activation_layer(name=name)
else:
return tf.keras.layers.Lambda(tf.nn.relu6, name=name)
# pylint: enable=unused-argument
# pylint: disable=unused-argument
def ZeroPadding2D(self, **kwargs):
"""Replaces explicit padding in the Keras application with a no-op.
Args:
**kwargs: Ignored, required to match the Keras applications usage.
Returns:
A no-op identity lambda.
"""
return lambda x: x
# pylint: enable=unused-argument
# Forward all non-overridden methods to the keras layers
def __getattr__(self, item):
return getattr(tf.keras.layers, item)
def mobilenet_v2(batchnorm_training,
default_batchnorm_momentum=0.9997,
conv_hyperparams=None,
use_explicit_padding=False,
alpha=1.0,
min_depth=None,
**kwargs):
"""Instantiates the MobileNetV2 architecture, modified for object detection.
This wraps the MobileNetV2 tensorflow Keras application, but uses the
Keras application's kwargs-based monkey-patching API to override the Keras
architecture with the following changes:
- Changes the default batchnorm momentum to 0.9997
- Applies the Object Detection hyperparameter configuration
- Supports FreezableBatchNorms
- Adds support for a min number of filters for each layer
- Makes the `alpha` parameter affect the final convolution block even if it
is less than 1.0
- Adds support for explicit padding of convolutions
- Makes the Input layer use a tf.placeholder_with_default instead of a
tf.placeholder, to work on TPUs.
Args:
batchnorm_training: Bool. Assigned to Batch norm layer `training` param
when constructing `freezable_batch_norm.FreezableBatchNorm` layers.
default_batchnorm_momentum: Float. When 'conv_hyperparams' is None,
batch norm layers will be constructed using this value as the momentum.
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
containing hyperparameters for convolution ops. Optionally set to `None`
to use default mobilenet_v2 layer builders.
use_explicit_padding: If True, use 'valid' padding for convolutions,
but explicitly pre-pads inputs so that the output dimensions are the
same as if 'same' padding were used. Off by default.
alpha: The width multiplier referenced in the MobileNetV2 paper. It
modifies the number of filters in each convolutional layer.
min_depth: Minimum number of filters in the convolutional layers.
**kwargs: Keyword arguments forwarded directly to the
`tf.keras.applications.MobilenetV2` method that constructs the Keras
model.
Returns:
A Keras model instance.
"""
layers_override = _LayersOverride(
batchnorm_training,
default_batchnorm_momentum=default_batchnorm_momentum,
conv_hyperparams=conv_hyperparams,
use_explicit_padding=use_explicit_padding,
min_depth=min_depth,
alpha=alpha)
return tf.keras.applications.MobileNetV2(alpha=alpha,
layers=layers_override,
**kwargs)
# pylint: enable=invalid-name
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for mobilenet_v2."""
import itertools
import numpy as np
import tensorflow as tf
from google.protobuf import text_format
from object_detection.builders import hyperparams_builder
from object_detection.models.keras_applications import mobilenet_v2
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
_layers_to_check = [
'Conv1_relu',
'block_1_expand_relu', 'block_1_depthwise_relu', 'block_1_project_BN',
'block_2_expand_relu', 'block_2_depthwise_relu', 'block_2_project_BN',
'block_3_expand_relu', 'block_3_depthwise_relu', 'block_3_project_BN',
'block_4_expand_relu', 'block_4_depthwise_relu', 'block_4_project_BN',
'block_5_expand_relu', 'block_5_depthwise_relu', 'block_5_project_BN',
'block_6_expand_relu', 'block_6_depthwise_relu', 'block_6_project_BN',
'block_7_expand_relu', 'block_7_depthwise_relu', 'block_7_project_BN',
'block_8_expand_relu', 'block_8_depthwise_relu', 'block_8_project_BN',
'block_9_expand_relu', 'block_9_depthwise_relu', 'block_9_project_BN',
'block_10_expand_relu', 'block_10_depthwise_relu', 'block_10_project_BN',
'block_11_expand_relu', 'block_11_depthwise_relu', 'block_11_project_BN',
'block_12_expand_relu', 'block_12_depthwise_relu', 'block_12_project_BN',
'block_13_expand_relu', 'block_13_depthwise_relu', 'block_13_project_BN',
'block_14_expand_relu', 'block_14_depthwise_relu', 'block_14_project_BN',
'block_15_expand_relu', 'block_15_depthwise_relu', 'block_15_project_BN',
'block_16_expand_relu', 'block_16_depthwise_relu', 'block_16_project_BN',
'out_relu']
class MobilenetV2Test(test_case.TestCase):
def _build_conv_hyperparams(self):
conv_hyperparams = hyperparams_pb2.Hyperparams()
conv_hyperparams_text_proto = """
activation: RELU_6
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
batch_norm {
train: true,
scale: false,
center: true,
decay: 0.2,
epsilon: 0.1,
}
"""
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
def _create_application_with_layer_outputs(
self, layer_names, batchnorm_training,
conv_hyperparams=None,
use_explicit_padding=False,
alpha=1.0,
min_depth=None):
"""Constructs Keras mobilenetv2 that extracts intermediate layer outputs."""
if not layer_names:
layer_names = _layers_to_check
full_model = mobilenet_v2.mobilenet_v2(
batchnorm_training=batchnorm_training,
conv_hyperparams=conv_hyperparams,
weights=None,
use_explicit_padding=use_explicit_padding,
alpha=alpha,
min_depth=min_depth,
include_top=False)
layer_outputs = [full_model.get_layer(name=layer).output
for layer in layer_names]
return tf.keras.Model(
inputs=full_model.inputs,
outputs=layer_outputs)
def _check_returns_correct_shape(
self, batch_size, image_height, image_width, depth_multiplier,
expected_feature_map_shapes, use_explicit_padding=False, min_depth=None,
layer_names=None):
def graph_fn(image_tensor):
model = self._create_application_with_layer_outputs(
layer_names=layer_names,
batchnorm_training=False, use_explicit_padding=use_explicit_padding,
min_depth=min_depth,
alpha=depth_multiplier)
return model(image_tensor)
image_tensor = np.random.rand(batch_size, image_height, image_width,
3).astype(np.float32)
feature_maps = self.execute(graph_fn, [image_tensor])
for feature_map, expected_shape in itertools.izip(
feature_maps, expected_feature_map_shapes):
self.assertAllEqual(feature_map.shape, expected_shape)
def _check_returns_correct_shapes_with_dynamic_inputs(
self, batch_size, image_height, image_width, depth_multiplier,
expected_feature_map_shapes, use_explicit_padding=False,
layer_names=None):
def graph_fn(image_height, image_width):
image_tensor = tf.random_uniform([batch_size, image_height, image_width,
3], dtype=tf.float32)
model = self._create_application_with_layer_outputs(
layer_names=layer_names,
batchnorm_training=False, use_explicit_padding=use_explicit_padding,
alpha=depth_multiplier)
return model(image_tensor)
feature_maps = self.execute_cpu(graph_fn, [
np.array(image_height, dtype=np.int32),
np.array(image_width, dtype=np.int32)
])
for feature_map, expected_shape in itertools.izip(
feature_maps, expected_feature_map_shapes):
self.assertAllEqual(feature_map.shape, expected_shape)
def _get_variables(self, depth_multiplier, layer_names=None):
g = tf.Graph()
with g.as_default():
preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
model = self._create_application_with_layer_outputs(
layer_names=layer_names,
batchnorm_training=False, use_explicit_padding=False,
alpha=depth_multiplier)
model(preprocessed_inputs)
return g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
def test_returns_correct_shapes_128(self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
expected_feature_map_shape = [(2, 64, 64, 32),
(2, 64, 64, 96),
(2, 32, 32, 96),
(2, 32, 32, 24),
(2, 32, 32, 144),
(2, 32, 32, 144),
(2, 32, 32, 24),
(2, 32, 32, 144),
(2, 16, 16, 144),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 16, 16, 192),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 16, 16, 192),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 8, 8, 192),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 8, 8, 576),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 8, 8, 576),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 4, 4, 576),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 320),
(2, 4, 4, 1280)]
self._check_returns_correct_shape(
2, image_height, image_width, depth_multiplier,
expected_feature_map_shape)
def test_returns_correct_shapes_128_explicit_padding(
self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
expected_feature_map_shape = [(2, 64, 64, 32),
(2, 64, 64, 96),
(2, 32, 32, 96),
(2, 32, 32, 24),
(2, 32, 32, 144),
(2, 32, 32, 144),
(2, 32, 32, 24),
(2, 32, 32, 144),
(2, 16, 16, 144),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 16, 16, 192),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 16, 16, 192),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 8, 8, 192),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 8, 8, 576),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 8, 8, 576),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 4, 4, 576),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 320),
(2, 4, 4, 1280)]
self._check_returns_correct_shape(
2, image_height, image_width, depth_multiplier,
expected_feature_map_shape, use_explicit_padding=True)
def test_returns_correct_shapes_with_dynamic_inputs(
self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
expected_feature_map_shape = [(2, 64, 64, 32),
(2, 64, 64, 96),
(2, 32, 32, 96),
(2, 32, 32, 24),
(2, 32, 32, 144),
(2, 32, 32, 144),
(2, 32, 32, 24),
(2, 32, 32, 144),
(2, 16, 16, 144),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 16, 16, 192),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 16, 16, 192),
(2, 16, 16, 32),
(2, 16, 16, 192),
(2, 8, 8, 192),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 64),
(2, 8, 8, 384),
(2, 8, 8, 384),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 8, 8, 576),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 8, 8, 576),
(2, 8, 8, 96),
(2, 8, 8, 576),
(2, 4, 4, 576),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 160),
(2, 4, 4, 960),
(2, 4, 4, 960),
(2, 4, 4, 320),
(2, 4, 4, 1280)]
self._check_returns_correct_shapes_with_dynamic_inputs(
2, image_height, image_width, depth_multiplier,
expected_feature_map_shape)
def test_returns_correct_shapes_299(self):
image_height = 299
image_width = 299
depth_multiplier = 1.0
expected_feature_map_shape = [(2, 150, 150, 32),
(2, 150, 150, 96),
(2, 75, 75, 96),
(2, 75, 75, 24),
(2, 75, 75, 144),
(2, 75, 75, 144),
(2, 75, 75, 24),
(2, 75, 75, 144),
(2, 38, 38, 144),
(2, 38, 38, 32),
(2, 38, 38, 192),
(2, 38, 38, 192),
(2, 38, 38, 32),
(2, 38, 38, 192),
(2, 38, 38, 192),
(2, 38, 38, 32),
(2, 38, 38, 192),
(2, 19, 19, 192),
(2, 19, 19, 64),
(2, 19, 19, 384),
(2, 19, 19, 384),
(2, 19, 19, 64),
(2, 19, 19, 384),
(2, 19, 19, 384),
(2, 19, 19, 64),
(2, 19, 19, 384),
(2, 19, 19, 384),
(2, 19, 19, 64),
(2, 19, 19, 384),
(2, 19, 19, 384),
(2, 19, 19, 96),
(2, 19, 19, 576),
(2, 19, 19, 576),
(2, 19, 19, 96),
(2, 19, 19, 576),
(2, 19, 19, 576),
(2, 19, 19, 96),
(2, 19, 19, 576),
(2, 10, 10, 576),
(2, 10, 10, 160),
(2, 10, 10, 960),
(2, 10, 10, 960),
(2, 10, 10, 160),
(2, 10, 10, 960),
(2, 10, 10, 960),
(2, 10, 10, 160),
(2, 10, 10, 960),
(2, 10, 10, 960),
(2, 10, 10, 320),
(2, 10, 10, 1280)]
self._check_returns_correct_shape(
2, image_height, image_width, depth_multiplier,
expected_feature_map_shape)
def test_returns_correct_shapes_enforcing_min_depth(
self):
image_height = 299
image_width = 299
depth_multiplier = 0.5**12
expected_feature_map_shape = [(2, 150, 150, 32),
(2, 150, 150, 192),
(2, 75, 75, 192),
(2, 75, 75, 32),
(2, 75, 75, 192),
(2, 75, 75, 192),
(2, 75, 75, 32),
(2, 75, 75, 192),
(2, 38, 38, 192),
(2, 38, 38, 32),
(2, 38, 38, 192),
(2, 38, 38, 192),
(2, 38, 38, 32),
(2, 38, 38, 192),
(2, 38, 38, 192),
(2, 38, 38, 32),
(2, 38, 38, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 19, 19, 192),
(2, 19, 19, 32),
(2, 19, 19, 192),
(2, 10, 10, 192),
(2, 10, 10, 32),
(2, 10, 10, 192),
(2, 10, 10, 192),
(2, 10, 10, 32),
(2, 10, 10, 192),
(2, 10, 10, 192),
(2, 10, 10, 32),
(2, 10, 10, 192),
(2, 10, 10, 192),
(2, 10, 10, 32),
(2, 10, 10, 32)]
self._check_returns_correct_shape(
2, image_height, image_width, depth_multiplier,
expected_feature_map_shape, min_depth=32)
def test_hyperparam_override(self):
hyperparams = self._build_conv_hyperparams()
model = mobilenet_v2.mobilenet_v2(
batchnorm_training=True,
conv_hyperparams=hyperparams,
weights=None,
use_explicit_padding=False,
alpha=1.0,
min_depth=32,
include_top=False)
hyperparams.params()
bn_layer = model.get_layer(name='block_5_project_BN')
self.assertAllClose(bn_layer.momentum, 0.2)
self.assertAllClose(bn_layer.epsilon, 0.1)
def test_variable_count(self):
depth_multiplier = 1
variables = self._get_variables(depth_multiplier)
self.assertEqual(len(variables), 260)
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment