"docs/source/examples/vscode:/vscode.git/clone" did not exist on "ba78cbdcd0177db9d6ec0947ddf81b5157f3535c"
Commit decbad8a authored by Zhichao Lu's avatar Zhichao Lu Committed by pkulzc
Browse files

Add option to override base feature extractor hyperparams in SSD models. This...

Add option to override base feature extractor hyperparams in SSD models. This would allow us to use the same set of hyperparams for the complete feature extractor (base + new layers) if desired.

PiperOrigin-RevId: 191787921
parent 45ecc0f9
......@@ -17,6 +17,7 @@
import tensorflow as tf
from object_detection.protos import hyperparams_pb2
from object_detection.utils import context_manager
slim = tf.contrib.slim
......@@ -66,16 +67,19 @@ def build(hyperparams_config, is_training):
hyperparams_config.op == hyperparams_pb2.Hyperparams.FC):
affected_ops = [slim.fully_connected]
def scope_fn():
with slim.arg_scope(
affected_ops,
weights_regularizer=_build_regularizer(
hyperparams_config.regularizer),
weights_initializer=_build_initializer(
hyperparams_config.initializer),
activation_fn=_build_activation_fn(hyperparams_config.activation),
normalizer_fn=batch_norm,
normalizer_params=batch_norm_params) as sc:
return sc
with (slim.arg_scope([slim.batch_norm], **batch_norm_params)
if batch_norm_params is not None else
context_manager.IdentityContextManager()):
with slim.arg_scope(
affected_ops,
weights_regularizer=_build_regularizer(
hyperparams_config.regularizer),
weights_initializer=_build_initializer(
hyperparams_config.initializer),
activation_fn=_build_activation_fn(hyperparams_config.activation),
normalizer_fn=batch_norm) as sc:
return sc
return scope_fn
......@@ -170,6 +174,9 @@ def _build_batch_norm_params(batch_norm, is_training):
'center': batch_norm.center,
'scale': batch_norm.scale,
'epsilon': batch_norm.epsilon,
# Remove is_training parameter from here and deprecate it in the proto
# once we refactor Faster RCNN models to set is_training through an outer
# arg_scope in the meta architecture.
'is_training': is_training and batch_norm.train,
}
return batch_norm_params
......@@ -26,11 +26,11 @@ from object_detection.protos import hyperparams_pb2
slim = tf.contrib.slim
class HyperparamsBuilderTest(tf.test.TestCase):
def _get_scope_key(op):
return getattr(op, '_key_op', str(op))
# TODO(rathodv): Make this a public api in slim arg_scope.py.
def _get_scope_key(self, op):
return getattr(op, '_key_op', str(op))
class HyperparamsBuilderTest(tf.test.TestCase):
def test_default_arg_scope_has_conv2d_op(self):
conv_hyperparams_text_proto = """
......@@ -48,7 +48,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
self.assertTrue(self._get_scope_key(slim.conv2d) in scope)
self.assertTrue(_get_scope_key(slim.conv2d) in scope)
def test_default_arg_scope_has_separable_conv2d_op(self):
conv_hyperparams_text_proto = """
......@@ -66,7 +66,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
self.assertTrue(self._get_scope_key(slim.separable_conv2d) in scope)
self.assertTrue(_get_scope_key(slim.separable_conv2d) in scope)
def test_default_arg_scope_has_conv2d_transpose_op(self):
conv_hyperparams_text_proto = """
......@@ -84,7 +84,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
self.assertTrue(self._get_scope_key(slim.conv2d_transpose) in scope)
self.assertTrue(_get_scope_key(slim.conv2d_transpose) in scope)
def test_explicit_fc_op_arg_scope_has_fully_connected_op(self):
conv_hyperparams_text_proto = """
......@@ -103,7 +103,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
self.assertTrue(self._get_scope_key(slim.fully_connected) in scope)
self.assertTrue(_get_scope_key(slim.fully_connected) in scope)
def test_separable_conv2d_and_conv2d_and_transpose_have_same_parameters(self):
conv_hyperparams_text_proto = """
......@@ -166,7 +166,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
regularizer = conv_scope_arguments['weights_regularizer']
weights = np.array([1., -1, 4., 2.])
......@@ -197,9 +197,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
batch_norm_params = conv_scope_arguments['normalizer_params']
batch_norm_params = scope[_get_scope_key(slim.batch_norm)]
self.assertAlmostEqual(batch_norm_params['decay'], 0.7)
self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
self.assertFalse(batch_norm_params['center'])
......@@ -229,9 +229,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=False)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
batch_norm_params = conv_scope_arguments['normalizer_params']
batch_norm_params = scope[_get_scope_key(slim.batch_norm)]
self.assertAlmostEqual(batch_norm_params['decay'], 0.7)
self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
self.assertFalse(batch_norm_params['center'])
......@@ -261,9 +261,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
batch_norm_params = conv_scope_arguments['normalizer_params']
batch_norm_params = scope[_get_scope_key(slim.batch_norm)]
self.assertAlmostEqual(batch_norm_params['decay'], 0.7)
self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
self.assertFalse(batch_norm_params['center'])
......@@ -286,9 +286,8 @@ class HyperparamsBuilderTest(tf.test.TestCase):
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
self.assertEqual(conv_scope_arguments['normalizer_fn'], None)
self.assertEqual(conv_scope_arguments['normalizer_params'], None)
def test_use_none_activation(self):
conv_hyperparams_text_proto = """
......@@ -307,7 +306,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
self.assertEqual(conv_scope_arguments['activation_fn'], None)
def test_use_relu_activation(self):
......@@ -327,7 +326,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu)
def test_use_relu_6_activation(self):
......@@ -347,7 +346,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu6)
def _assert_variance_in_range(self, initializer, shape, variance,
......@@ -382,7 +381,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
initializer = conv_scope_arguments['weights_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=2. / 100.)
......@@ -406,7 +405,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
initializer = conv_scope_arguments['weights_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=2. / 40.)
......@@ -430,7 +429,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
initializer = conv_scope_arguments['weights_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=4. / (100. + 40.))
......@@ -454,7 +453,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
initializer = conv_scope_arguments['weights_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=2. / 100.)
......@@ -477,7 +476,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
initializer = conv_scope_arguments['weights_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=0.49, tol=1e-1)
......@@ -500,7 +499,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
initializer = conv_scope_arguments['weights_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=0.64, tol=1e-1)
......
......@@ -120,6 +120,8 @@ def _build_ssd_feature_extractor(feature_extractor_config, is_training,
use_depthwise = feature_extractor_config.use_depthwise
conv_hyperparams = hyperparams_builder.build(
feature_extractor_config.conv_hyperparams, is_training)
override_base_feature_extractor_hyperparams = (
feature_extractor_config.override_base_feature_extractor_hyperparams)
if feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP:
raise ValueError('Unknown ssd feature_extractor: {}'.format(feature_type))
......@@ -127,7 +129,8 @@ def _build_ssd_feature_extractor(feature_extractor_config, is_training,
feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type]
return feature_extractor_class(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, reuse_weights, use_explicit_padding, use_depthwise)
conv_hyperparams, reuse_weights, use_explicit_padding, use_depthwise,
override_base_feature_extractor_hyperparams)
def _build_ssd_model(ssd_config, is_training, add_summaries):
......
......@@ -83,6 +83,7 @@ class ModelBuilderTest(tf.test.TestCase):
}
}
}
override_base_feature_extractor_hyperparams: true
}
box_coder {
faster_rcnn_box_coder {
......@@ -154,6 +155,7 @@ class ModelBuilderTest(tf.test.TestCase):
}
}
}
override_base_feature_extractor_hyperparams: true
}
box_coder {
faster_rcnn_box_coder {
......
......@@ -45,7 +45,8 @@ class SSDFeatureExtractor(object):
conv_hyperparams_fn,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
use_depthwise=False,
override_base_feature_extractor_hyperparams=False):
"""Constructor.
Args:
......@@ -55,12 +56,15 @@ class SSDFeatureExtractor(object):
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops.
and separable_conv2d ops in the layers that are added on top of the
base feature extractor.
reuse_weights: whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_fn`.
"""
self._is_training = is_training
self._depth_multiplier = depth_multiplier
......@@ -70,6 +74,8 @@ class SSDFeatureExtractor(object):
self._reuse_weights = reuse_weights
self._use_explicit_padding = use_explicit_padding
self._use_depthwise = use_depthwise
self._override_base_feature_extractor_hyperparams = (
override_base_feature_extractor_hyperparams)
@abstractmethod
def preprocess(self, resized_inputs):
......
......@@ -17,16 +17,16 @@
import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators
from object_detection.models import ssd_mobilenet_v1_feature_extractor
from object_detection.utils import context_manager
from object_detection.utils import ops
from nets import mobilenet_v1
slim = tf.contrib.slim
class EmbeddedSSDMobileNetV1FeatureExtractor(
ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor):
class EmbeddedSSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""Embedded-friendly SSD Feature Extractor using MobilenetV1 features.
This feature extractor is similar to SSD MobileNetV1 feature extractor, and
......@@ -52,7 +52,8 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
conv_hyperparams_fn,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
use_depthwise=False,
override_base_feature_extractor_hyperparams=False):
"""MobileNetV1 Feature Extractor for Embedded-friendly SSD Models.
Args:
......@@ -62,11 +63,15 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. For EmbeddedSSD it must be set to 1.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops.
and separable_conv2d ops in the layers that are added on top of the
base feature extractor.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_fn`.
Raises:
ValueError: upon invalid `pad_to_multiple` values.
......@@ -77,7 +82,23 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
super(EmbeddedSSDMobileNetV1FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise)
conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise,
override_base_feature_extractor_hyperparams)
def preprocess(self, resized_inputs):
"""SSD preprocessing.
Maps pixel values to the range [-1, 1].
Args:
resized_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
return (2.0 / 255.0) * resized_inputs - 1.0
def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
......@@ -119,16 +140,25 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
'use_depthwise': self._use_depthwise,
}
with slim.arg_scope(self._conv_hyperparams_fn()):
with slim.arg_scope([slim.batch_norm], fused=False):
with tf.variable_scope('MobilenetV1',
reuse=self._reuse_weights) as scope:
_, image_features = mobilenet_v1.mobilenet_v1_base(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
final_endpoint='Conv2d_13_pointwise',
min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
scope=scope)
with tf.variable_scope('MobilenetV1',
reuse=self._reuse_weights) as scope:
with slim.arg_scope(
mobilenet_v1.mobilenet_v1_arg_scope(is_training=None)):
with (slim.arg_scope(self._conv_hyperparams_fn())
if self._override_base_feature_extractor_hyperparams
else context_manager.IdentityContextManager()):
# TODO(skligys): Enable fused batch norm once quantization supports it.
with slim.arg_scope([slim.batch_norm], fused=False):
_, image_features = mobilenet_v1.mobilenet_v1_base(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
final_endpoint='Conv2d_13_pointwise',
min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
use_explicit_padding=self._use_explicit_padding,
scope=scope)
with slim.arg_scope(self._conv_hyperparams_fn()):
# TODO(skligys): Enable fused batch norm once quantization supports it.
with slim.arg_scope([slim.batch_norm], fused=False):
feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_map_layout=feature_map_layout,
depth_multiplier=self._depth_multiplier,
......
......@@ -41,7 +41,8 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest(
return (embedded_ssd_mobilenet_v1_feature_extractor.
EmbeddedSSDMobileNetV1FeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple,
self.conv_hyperparams_fn))
self.conv_hyperparams_fn,
override_base_feature_extractor_hyperparams=True))
def test_extract_features_returns_correct_shapes_256(self):
image_height = 256
......
......@@ -36,7 +36,8 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
conv_hyperparams_fn,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
use_depthwise=False,
override_base_feature_extractor_hyperparams=False):
"""InceptionV2 Feature Extractor for SSD Models.
Args:
......@@ -46,15 +47,29 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops.
and separable_conv2d ops in the layers that are added on top of the
base feature extractor.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_fn`.
Raises:
ValueError: If `override_base_feature_extractor_hyperparams` is False.
"""
super(SSDInceptionV2FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise)
conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise,
override_base_feature_extractor_hyperparams)
if not self._override_base_feature_extractor_hyperparams:
raise ValueError('SSD Inception V2 feature extractor always uses'
'scope returned by `conv_hyperparams_fn` for both the '
'base feature extractor and the additional layers '
'added since there is no arg_scope defined for the base '
'feature extractor.')
def preprocess(self, resized_inputs):
"""SSD preprocessing.
......
......@@ -40,7 +40,8 @@ class SsdInceptionV2FeatureExtractorTest(
min_depth = 32
return ssd_inception_v2_feature_extractor.SSDInceptionV2FeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple,
self.conv_hyperparams_fn)
self.conv_hyperparams_fn,
override_base_feature_extractor_hyperparams=True)
def test_extract_features_returns_correct_shapes_128(self):
image_height = 128
......
......@@ -36,7 +36,8 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
conv_hyperparams_fn,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
use_depthwise=False,
override_base_feature_extractor_hyperparams=False):
"""InceptionV3 Feature Extractor for SSD Models.
Args:
......@@ -46,15 +47,30 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops.
and separable_conv2d ops in the layers that are added on top of the
base feature extractor.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_fn`.
Raises:
ValueError: If `override_base_feature_extractor_hyperparams` is False.
"""
super(SSDInceptionV3FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise)
conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise,
override_base_feature_extractor_hyperparams)
if not self._override_base_feature_extractor_hyperparams:
raise ValueError('SSD Inception V3 feature extractor always uses'
'scope returned by `conv_hyperparams_fn` for both the '
'base feature extractor and the additional layers '
'added since there is no arg_scope defined for the base '
'feature extractor.')
def preprocess(self, resized_inputs):
"""SSD preprocessing.
......
......@@ -40,7 +40,8 @@ class SsdInceptionV3FeatureExtractorTest(
min_depth = 32
return ssd_inception_v3_feature_extractor.SSDInceptionV3FeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple,
self.conv_hyperparams_fn)
self.conv_hyperparams_fn,
override_base_feature_extractor_hyperparams=True)
def test_extract_features_returns_correct_shapes_128(self):
image_height = 128
......
......@@ -19,6 +19,7 @@ import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators
from object_detection.utils import context_manager
from object_detection.utils import ops
from object_detection.utils import shape_utils
from nets import mobilenet_v1
......@@ -37,7 +38,8 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
conv_hyperparams_fn,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
use_depthwise=False,
override_base_feature_extractor_hyperparams=False):
"""MobileNetV1 Feature Extractor for SSD Models.
Args:
......@@ -47,16 +49,21 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops.
and separable_conv2d ops in the layers that are added on top of the
base feature extractor.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
use_depthwise: Whether to use depthwise convolutions. Default is False.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_fn`.
"""
super(SSDMobileNetV1FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise)
conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise,
override_base_feature_extractor_hyperparams)
def preprocess(self, resized_inputs):
"""SSD preprocessing.
......@@ -99,15 +106,18 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
reuse=self._reuse_weights) as scope:
with slim.arg_scope(
mobilenet_v1.mobilenet_v1_arg_scope(is_training=None)):
with (slim.arg_scope(self._conv_hyperparams_fn())
if self._override_base_feature_extractor_hyperparams
else context_manager.IdentityContextManager()):
# TODO(skligys): Enable fused batch norm once quantization supports it.
with slim.arg_scope([slim.batch_norm], fused=False):
_, image_features = mobilenet_v1.mobilenet_v1_base(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
final_endpoint='Conv2d_13_pointwise',
min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
use_explicit_padding=self._use_explicit_padding,
scope=scope)
with slim.arg_scope([slim.batch_norm], fused=False):
_, image_features = mobilenet_v1.mobilenet_v1_base(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
final_endpoint='Conv2d_13_pointwise',
min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
use_explicit_padding=self._use_explicit_padding,
scope=scope)
with slim.arg_scope(self._conv_hyperparams_fn()):
# TODO(skligys): Enable fused batch norm once quantization supports it.
with slim.arg_scope([slim.batch_norm], fused=False):
......
......@@ -19,6 +19,7 @@ import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators
from object_detection.utils import context_manager
from object_detection.utils import ops
from object_detection.utils import shape_utils
from nets.mobilenet import mobilenet
......@@ -38,7 +39,8 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
conv_hyperparams_fn,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
use_depthwise=False,
override_base_feature_extractor_hyperparams=False):
"""MobileNetV2 Feature Extractor for SSD Models.
Mobilenet v2 (experimental), designed by sandler@. More details can be found
......@@ -51,15 +53,20 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops.
and separable_conv2d ops in the layers that are added on top of the
base feature extractor.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_fn`.
"""
super(SSDMobileNetV2FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise)
conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise,
override_base_feature_extractor_hyperparams)
def preprocess(self, resized_inputs):
"""SSD preprocessing.
......@@ -102,15 +109,18 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \
slim.arg_scope(
[mobilenet.depth_multiplier], min_depth=self._min_depth):
# TODO(b/68150321): Enable fused batch norm once quantization
# supports it.
with slim.arg_scope([slim.batch_norm], fused=False):
_, image_features = mobilenet_v2.mobilenet_base(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
final_endpoint='layer_19',
depth_multiplier=self._depth_multiplier,
use_explicit_padding=self._use_explicit_padding,
scope=scope)
with (slim.arg_scope(self._conv_hyperparams_fn())
if self._override_base_feature_extractor_hyperparams else
context_manager.IdentityContextManager()):
# TODO(b/68150321): Enable fused batch norm once quantization
# supports it.
with slim.arg_scope([slim.batch_norm], fused=False):
_, image_features = mobilenet_v2.mobilenet_base(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
final_endpoint='layer_19',
depth_multiplier=self._depth_multiplier,
use_explicit_padding=self._use_explicit_padding,
scope=scope)
with slim.arg_scope(self._conv_hyperparams_fn()):
# TODO(b/68150321): Enable fused batch norm once quantization
# supports it.
......
......@@ -21,6 +21,7 @@ import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators
from object_detection.utils import context_manager
from object_detection.utils import ops
from object_detection.utils import shape_utils
from nets import resnet_v1
......@@ -42,7 +43,8 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
fpn_scope_name,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
use_depthwise=False,
override_base_feature_extractor_hyperparams=False):
"""SSD FPN feature extractor based on Resnet v1 architecture.
Args:
......@@ -53,7 +55,8 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops.
and separable_conv2d ops in the layers that are added on top of the
base feature extractor.
resnet_base_fn: base resnet network to use.
resnet_scope_name: scope name under which to construct resnet
fpn_scope_name: scope name under which to construct the feature pyramid
......@@ -62,13 +65,17 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_fn`.
Raises:
ValueError: On supplying invalid arguments for unused arguments.
"""
super(_SSDResnetV1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams_fn, reuse_weights, use_explicit_padding)
conv_hyperparams_fn, reuse_weights, use_explicit_padding,
override_base_feature_extractor_hyperparams)
if self._depth_multiplier != 1.0:
raise ValueError('Only depth 1.0 is supported, found: {}'.
format(self._depth_multiplier))
......@@ -128,15 +135,18 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
with tf.variable_scope(
self._resnet_scope_name, reuse=self._reuse_weights) as scope:
with slim.arg_scope(resnet_v1.resnet_arg_scope()):
_, image_features = self._resnet_base_fn(
inputs=ops.pad_to_multiple(preprocessed_inputs,
self._pad_to_multiple),
num_classes=None,
is_training=None,
global_pool=False,
output_stride=None,
store_non_strided_activations=True,
scope=scope)
with (slim.arg_scope(self._conv_hyperparams_fn())
if self._override_base_feature_extractor_hyperparams else
context_manager.IdentityContextManager()):
_, image_features = self._resnet_base_fn(
inputs=ops.pad_to_multiple(preprocessed_inputs,
self._pad_to_multiple),
num_classes=None,
is_training=None,
global_pool=False,
output_stride=None,
store_non_strided_activations=True,
scope=scope)
image_features = self._filter_features(image_features)
last_feature_map = image_features['block4']
with tf.variable_scope(self._fpn_scope_name, reuse=self._reuse_weights):
......@@ -167,28 +177,36 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
conv_hyperparams_fn,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
"""Resnet50 v1 FPN Feature Extractor for SSD Models.
use_depthwise=False,
override_base_feature_extractor_hyperparams=False):
"""SSD Resnet50 V1 FPN feature extractor based on Resnet v1 architecture.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
UNUSED currently.
min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops in the layers that are added on top of the
base feature extractor.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_fn`.
"""
super(SSDResnet50V1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, resnet_v1.resnet_v1_50, 'resnet_v1_50', 'fpn',
reuse_weights, use_explicit_padding)
conv_hyperparams_fn, resnet_v1.resnet_v1_50, 'resnet_v1_50', 'fpn',
reuse_weights, use_explicit_padding,
override_base_feature_extractor_hyperparams)
class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
......@@ -198,28 +216,36 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
conv_hyperparams_fn,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
"""Resnet101 v1 FPN Feature Extractor for SSD Models.
use_depthwise=False,
override_base_feature_extractor_hyperparams=False):
"""SSD Resnet101 V1 FPN feature extractor based on Resnet v1 architecture.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
UNUSED currently.
min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops in the layers that are added on top of the
base feature extractor.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_fn`.
"""
super(SSDResnet101V1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, resnet_v1.resnet_v1_101, 'resnet_v1_101', 'fpn',
reuse_weights, use_explicit_padding)
conv_hyperparams_fn, resnet_v1.resnet_v1_101, 'resnet_v1_101', 'fpn',
reuse_weights, use_explicit_padding,
override_base_feature_extractor_hyperparams)
class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
......@@ -229,25 +255,33 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
conv_hyperparams_fn,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
"""Resnet152 v1 FPN Feature Extractor for SSD Models.
use_depthwise=False,
override_base_feature_extractor_hyperparams=False):
"""SSD Resnet152 V1 FPN feature extractor based on Resnet v1 architecture.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
UNUSED currently.
min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops in the layers that are added on top of the
base feature extractor.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_fn`.
"""
super(SSDResnet152V1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, resnet_v1.resnet_v1_152, 'resnet_v1_152', 'fpn',
reuse_weights, use_explicit_padding)
conv_hyperparams_fn, resnet_v1.resnet_v1_152, 'resnet_v1_152', 'fpn',
reuse_weights, use_explicit_padding,
override_base_feature_extractor_hyperparams)
......@@ -99,6 +99,14 @@ message SsdFeatureExtractor {
// of the base feature extractor.
optional Hyperparams conv_hyperparams = 4;
// Normally, SSD feature extractors are constructed by reusing an existing
// base feature extractor (that has its own hyperparams) and adding new layers
// on top of it. `conv_hyperparams` above normally applies only to the new
// layers while base feature extractor uses its own default hyperparams. If
// this value is set to true, the base feature extractor's hyperparams will be
// overridden with the `conv_hyperparams`.
optional bool override_base_feature_extractor_hyperparams = 9 [default = false];
// The nearest multiple to zero-pad the input height and width dimensions to.
// For example, if pad_to_multiple = 2, input dimensions are zero-padded
// until the resulting dimensions are even.
......
......@@ -98,6 +98,7 @@ model {
epsilon: 0.001,
}
}
override_base_feature_extractor_hyperparams: true
}
loss {
classification_loss {
......
......@@ -98,6 +98,7 @@ model {
epsilon: 0.001,
}
}
override_base_feature_extractor_hyperparams: true
}
loss {
classification_loss {
......
......@@ -98,6 +98,7 @@ model {
epsilon: 0.01,
}
}
override_base_feature_extractor_hyperparams: true
}
loss {
classification_loss {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment