Unverified Commit 74a03640 authored by vivek rathod's avatar vivek rathod Committed by GitHub
Browse files

Merge pull request #2631 from tombstone/feature_extractors_update

feature extractor and model builder update.
parents ff88581a 3237c080
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.models.ssd_inception_v3_feature_extractor."""
import numpy as np
import tensorflow as tf
from object_detection.models import ssd_feature_extractor_test
from object_detection.models import ssd_inception_v3_feature_extractor
class SsdInceptionV3FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, batch_norm_trainable=True):
"""Constructs a SsdInceptionV3FeatureExtractor.
Args:
depth_multiplier: float depth multiplier for feature extractor
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
is_training: whether the network is in training mode.
batch_norm_trainable: Whether to update batch norm parameters during
training or not
Returns:
an ssd_inception_v3_feature_extractor.SsdInceptionV3FeatureExtractor.
"""
min_depth = 32
conv_hyperparams = {}
return ssd_inception_v3_feature_extractor.SSDInceptionV3FeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable)
def test_extract_features_returns_correct_shapes_128(self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(4, 13, 13, 288), (4, 6, 6, 768),
(4, 2, 2, 2048), (4, 1, 1, 512),
(4, 1, 1, 256), (4, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_299(self):
image_height = 299
image_width = 299
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(4, 35, 35, 288), (4, 17, 17, 768),
(4, 8, 8, 2048), (4, 4, 4, 512),
(4, 2, 2, 256), (4, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
image_height = 299
image_width = 299
depth_multiplier = 0.5**12
pad_to_multiple = 1
expected_feature_map_shape = [(4, 35, 35, 128), (4, 17, 17, 128),
(4, 8, 8, 192), (4, 4, 4, 32),
(4, 2, 2, 32), (4, 1, 1, 32)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
image_height = 299
image_width = 299
depth_multiplier = 1.0
pad_to_multiple = 32
expected_feature_map_shape = [(4, 37, 37, 288), (4, 18, 18, 768),
(4, 8, 8, 2048), (4, 4, 4, 512),
(4, 2, 2, 256), (4, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_raises_error_with_invalid_image_size(self):
image_height = 32
image_width = 32
depth_multiplier = 1.0
pad_to_multiple = 1
self.check_extract_features_raises_error_with_invalid_image_size(
image_height, image_width, depth_multiplier, pad_to_multiple)
def test_preprocess_returns_correct_value_range(self):
image_height = 128
image_width = 128
depth_multiplier = 1
pad_to_multiple = 1
test_image = np.random.rand(4, image_height, image_width, 3)
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
preprocessed_image = feature_extractor.preprocess(test_image)
self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
def test_variables_only_created_in_scope(self):
depth_multiplier = 1
pad_to_multiple = 1
scope_name = 'InceptionV3'
self.check_feature_extractor_variables_under_scope(
depth_multiplier, pad_to_multiple, scope_name)
if __name__ == '__main__':
tf.test.main()
...@@ -19,6 +19,7 @@ import tensorflow as tf ...@@ -19,6 +19,7 @@ import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators from object_detection.models import feature_map_generators
from object_detection.utils import ops
from nets import mobilenet_v1 from nets import mobilenet_v1
slim = tf.contrib.slim slim = tf.contrib.slim
...@@ -28,20 +29,31 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -28,20 +29,31 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""SSD Feature Extractor using MobilenetV1 features.""" """SSD Feature Extractor using MobilenetV1 features."""
def __init__(self, def __init__(self,
is_training,
depth_multiplier, depth_multiplier,
min_depth, min_depth,
pad_to_multiple,
conv_hyperparams, conv_hyperparams,
batch_norm_trainable=True,
reuse_weights=None): reuse_weights=None):
"""MobileNetV1 Feature Extractor for SSD Models. """MobileNetV1 Feature Extractor for SSD Models.
Args: Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor. depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth. min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
""" """
super(SSDMobileNetV1FeatureExtractor, self).__init__( super(SSDMobileNetV1FeatureExtractor, self).__init__(
depth_multiplier, min_depth, conv_hyperparams, reuse_weights) is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights)
def preprocess(self, resized_inputs): def preprocess(self, resized_inputs):
"""SSD preprocessing. """SSD preprocessing.
...@@ -83,19 +95,20 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -83,19 +95,20 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
with tf.control_dependencies([shape_assert]): with tf.control_dependencies([shape_assert]):
with slim.arg_scope(self._conv_hyperparams): with slim.arg_scope(self._conv_hyperparams):
with tf.variable_scope('MobilenetV1', with slim.arg_scope([slim.batch_norm], fused=False):
reuse=self._reuse_weights) as scope: with tf.variable_scope('MobilenetV1',
_, image_features = mobilenet_v1.mobilenet_v1_base( reuse=self._reuse_weights) as scope:
preprocessed_inputs, _, image_features = mobilenet_v1.mobilenet_v1_base(
final_endpoint='Conv2d_13_pointwise', ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
min_depth=self._min_depth, final_endpoint='Conv2d_13_pointwise',
depth_multiplier=self._depth_multiplier, min_depth=self._min_depth,
scope=scope) depth_multiplier=self._depth_multiplier,
feature_maps = feature_map_generators.multi_resolution_feature_maps( scope=scope)
feature_map_layout=feature_map_layout, feature_maps = feature_map_generators.multi_resolution_feature_maps(
depth_multiplier=self._depth_multiplier, feature_map_layout=feature_map_layout,
min_depth=self._min_depth, depth_multiplier=self._depth_multiplier,
insert_1x1_conv=True, min_depth=self._min_depth,
image_features=image_features) insert_1x1_conv=True,
image_features=image_features)
return feature_maps.values() return feature_maps.values()
...@@ -20,75 +20,120 @@ import tensorflow as tf ...@@ -20,75 +20,120 @@ import tensorflow as tf
from object_detection.models import ssd_feature_extractor_test from object_detection.models import ssd_feature_extractor_test
from object_detection.models import ssd_mobilenet_v1_feature_extractor from object_detection.models import ssd_mobilenet_v1_feature_extractor
slim = tf.contrib.slim
class SsdMobilenetV1FeatureExtractorTest( class SsdMobilenetV1FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase): ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase):
def _create_feature_extractor(self, depth_multiplier): def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, batch_norm_trainable=True):
"""Constructs a new feature extractor. """Constructs a new feature extractor.
Args: Args:
depth_multiplier: float depth multiplier for feature extractor depth_multiplier: float depth multiplier for feature extractor
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
is_training: whether the network is in training mode.
batch_norm_trainable: Whether to update batch norm parameters during
training or not.
Returns: Returns:
an ssd_meta_arch.SSDFeatureExtractor object. an ssd_meta_arch.SSDFeatureExtractor object.
""" """
min_depth = 32 min_depth = 32
conv_hyperparams = {} with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm) as sc:
conv_hyperparams = sc
return ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor( return ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor(
depth_multiplier, min_depth, conv_hyperparams) is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable)
def test_extract_features_returns_correct_shapes_128(self): def test_extract_features_returns_correct_shapes_128(self):
image_height = 128 image_height = 128
image_width = 128 image_width = 128
depth_multiplier = 1.0 depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(4, 8, 8, 512), (4, 4, 4, 1024), expected_feature_map_shape = [(4, 8, 8, 512), (4, 4, 4, 1024),
(4, 2, 2, 512), (4, 1, 1, 256), (4, 2, 2, 512), (4, 1, 1, 256),
(4, 1, 1, 256), (4, 1, 1, 128)] (4, 1, 1, 256), (4, 1, 1, 128)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, expected_feature_map_shape) image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_299(self): def test_extract_features_returns_correct_shapes_299(self):
image_height = 299 image_height = 299
image_width = 299 image_width = 299
depth_multiplier = 1.0 depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(4, 19, 19, 512), (4, 10, 10, 1024), expected_feature_map_shape = [(4, 19, 19, 512), (4, 10, 10, 1024),
(4, 5, 5, 512), (4, 3, 3, 256), (4, 5, 5, 512), (4, 3, 3, 256),
(4, 2, 2, 256), (4, 1, 1, 128)] (4, 2, 2, 256), (4, 1, 1, 128)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, expected_feature_map_shape) image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_enforcing_min_depth(self): def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
image_height = 299 image_height = 299
image_width = 299 image_width = 299
depth_multiplier = 0.5**12 depth_multiplier = 0.5**12
pad_to_multiple = 1
expected_feature_map_shape = [(4, 19, 19, 32), (4, 10, 10, 32), expected_feature_map_shape = [(4, 19, 19, 32), (4, 10, 10, 32),
(4, 5, 5, 32), (4, 3, 3, 32), (4, 5, 5, 32), (4, 3, 3, 32),
(4, 2, 2, 32), (4, 1, 1, 32)] (4, 2, 2, 32), (4, 1, 1, 32)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, expected_feature_map_shape) image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
image_height = 299
image_width = 299
depth_multiplier = 1.0
pad_to_multiple = 32
expected_feature_map_shape = [(4, 20, 20, 512), (4, 10, 10, 1024),
(4, 5, 5, 512), (4, 3, 3, 256),
(4, 2, 2, 256), (4, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_raises_error_with_invalid_image_size(self): def test_extract_features_raises_error_with_invalid_image_size(self):
image_height = 32 image_height = 32
image_width = 32 image_width = 32
depth_multiplier = 1.0 depth_multiplier = 1.0
pad_to_multiple = 1
self.check_extract_features_raises_error_with_invalid_image_size( self.check_extract_features_raises_error_with_invalid_image_size(
image_height, image_width, depth_multiplier) image_height, image_width, depth_multiplier, pad_to_multiple)
def test_preprocess_returns_correct_value_range(self): def test_preprocess_returns_correct_value_range(self):
image_height = 128 image_height = 128
image_width = 128 image_width = 128
depth_multiplier = 1 depth_multiplier = 1
pad_to_multiple = 1
test_image = np.random.rand(4, image_height, image_width, 3) test_image = np.random.rand(4, image_height, image_width, 3)
feature_extractor = self._create_feature_extractor(depth_multiplier) feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
preprocessed_image = feature_extractor.preprocess(test_image) preprocessed_image = feature_extractor.preprocess(test_image)
self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0))) self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
def test_variables_only_created_in_scope(self): def test_variables_only_created_in_scope(self):
depth_multiplier = 1 depth_multiplier = 1
pad_to_multiple = 1
scope_name = 'MobilenetV1' scope_name = 'MobilenetV1'
self.check_feature_extractor_variables_under_scope(depth_multiplier, self.check_feature_extractor_variables_under_scope(
scope_name) depth_multiplier, pad_to_multiple, scope_name)
def test_nofused_batchnorm(self):
image_height = 40
image_width = 40
depth_multiplier = 1
pad_to_multiple = 1
image_placeholder = tf.placeholder(tf.float32,
[1, image_height, image_width, 3])
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
preprocessed_image = feature_extractor.preprocess(image_placeholder)
_ = feature_extractor.extract_features(preprocessed_image)
self.assertFalse(any(op.type == 'FusedBatchNorm'
for op in tf.get_default_graph().get_operations()))
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment