Commit 05584085 authored by pkulzc's avatar pkulzc Committed by Jonathan Huang
Browse files

Merged commit includes the following changes: (#6315)

236813471  by lzc:

    Internal change.

--
236507310  by lzc:

    Fix preprocess.random_resize_method config type issue. The target height and width will be passed as "size" to tf.image.resize_images which only accepts integer.

--
236409989  by Zhichao Lu:

    Config export_to_tpu from function parameter instead of HParams for TPU inference.

--
236403186  by Zhichao Lu:

    Make graph file names optional arguments.

--
236237072  by Zhichao Lu:

    Minor bugfix for keyword args.

--
236209602  by Zhichao Lu:

    Add support for PartitionedVariable to get_variables_available_in_checkpoint.

--
235828658  by Zhichao Lu:

    Automatically stop evaluation jobs when training is finished.

--
235817964  by Zhichao Lu:

    Add an optional process_metrics_fn callback to eval_util, it gets called
    with evaluation results once each evaluation is complete.

--
235788721  by lzc:

    Fix yml file tf runtime...
parent a5db4420
...@@ -29,7 +29,7 @@ from object_detection.utils import test_case ...@@ -29,7 +29,7 @@ from object_detection.utils import test_case
class SsdFeatureExtractorTestBase(test_case.TestCase): class SsdFeatureExtractorTestBase(test_case.TestCase):
def _build_conv_hyperparams(self): def _build_conv_hyperparams(self, add_batch_norm=True):
conv_hyperparams = hyperparams_pb2.Hyperparams() conv_hyperparams = hyperparams_pb2.Hyperparams()
conv_hyperparams_text_proto = """ conv_hyperparams_text_proto = """
activation: RELU_6 activation: RELU_6
...@@ -41,10 +41,14 @@ class SsdFeatureExtractorTestBase(test_case.TestCase): ...@@ -41,10 +41,14 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
truncated_normal_initializer { truncated_normal_initializer {
} }
} }
batch_norm {
scale: false
}
""" """
if add_batch_norm:
batch_norm_proto = """
batch_norm {
scale: false
}
"""
conv_hyperparams_text_proto += batch_norm_proto
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams) return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
......
...@@ -13,41 +13,69 @@ ...@@ -13,41 +13,69 @@
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Tests for ssd_mobilenet_v1_feature_extractor.""" """Tests for SSD Mobilenet V1 feature extractors.
By using parameterized test decorator, this test serves for both Slim-based and
Keras-based Mobilenet V1 feature extractors in SSD.
"""
from absl.testing import parameterized
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from object_detection.models import ssd_feature_extractor_test from object_detection.models import ssd_feature_extractor_test
from object_detection.models import ssd_mobilenet_v1_feature_extractor from object_detection.models import ssd_mobilenet_v1_feature_extractor
from object_detection.models import ssd_mobilenet_v1_keras_feature_extractor
slim = tf.contrib.slim slim = tf.contrib.slim
@parameterized.parameters(
{'use_keras': False},
{'use_keras': True},
)
class SsdMobilenetV1FeatureExtractorTest( class SsdMobilenetV1FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase): ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, use_explicit_padding=False): use_explicit_padding=False, is_training=False,
use_keras=False):
"""Constructs a new feature extractor. """Constructs a new feature extractor.
Args: Args:
depth_multiplier: float depth multiplier for feature extractor depth_multiplier: float depth multiplier for feature extractor
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
is_training: whether the network is in training mode.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding inputs so that the output dimensions are the same as if 'SAME' padding
were used. were used.
is_training: whether the network is in training mode.
use_keras: if True builds a keras-based feature extractor, if False builds
a slim-based one.
Returns: Returns:
an ssd_meta_arch.SSDFeatureExtractor object. an ssd_meta_arch.SSDFeatureExtractor object.
""" """
min_depth = 32 min_depth = 32
return ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor( if use_keras:
is_training, depth_multiplier, min_depth, pad_to_multiple, return (ssd_mobilenet_v1_keras_feature_extractor.
self.conv_hyperparams_fn, SSDMobileNetV1KerasFeatureExtractor(
use_explicit_padding=use_explicit_padding) is_training=is_training,
depth_multiplier=depth_multiplier,
def test_extract_features_returns_correct_shapes_128(self): min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams=self._build_conv_hyperparams(
add_batch_norm=False),
freeze_batchnorm=False,
inplace_batchnorm_update=False,
use_explicit_padding=use_explicit_padding,
name='MobilenetV1'))
else:
return ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple,
self.conv_hyperparams_fn,
use_explicit_padding=use_explicit_padding)
def test_extract_features_returns_correct_shapes_128(self, use_keras):
image_height = 128 image_height = 128
image_width = 128 image_width = 128
depth_multiplier = 1.0 depth_multiplier = 1.0
...@@ -57,12 +85,14 @@ class SsdMobilenetV1FeatureExtractorTest( ...@@ -57,12 +85,14 @@ class SsdMobilenetV1FeatureExtractorTest(
(2, 1, 1, 256), (2, 1, 1, 128)] (2, 1, 1, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=False) expected_feature_map_shape, use_explicit_padding=False,
use_keras=use_keras)
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=True) expected_feature_map_shape, use_explicit_padding=True,
use_keras=use_keras)
def test_extract_features_returns_correct_shapes_299(self): def test_extract_features_returns_correct_shapes_299(self, use_keras):
image_height = 299 image_height = 299
image_width = 299 image_width = 299
depth_multiplier = 1.0 depth_multiplier = 1.0
...@@ -72,12 +102,14 @@ class SsdMobilenetV1FeatureExtractorTest( ...@@ -72,12 +102,14 @@ class SsdMobilenetV1FeatureExtractorTest(
(2, 2, 2, 256), (2, 1, 1, 128)] (2, 2, 2, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=False) expected_feature_map_shape, use_explicit_padding=False,
use_keras=use_keras)
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=True) expected_feature_map_shape, use_explicit_padding=True,
use_keras=use_keras)
def test_extract_features_with_dynamic_image_shape(self): def test_extract_features_with_dynamic_image_shape(self, use_keras):
image_height = 128 image_height = 128
image_width = 128 image_width = 128
depth_multiplier = 1.0 depth_multiplier = 1.0
...@@ -87,12 +119,15 @@ class SsdMobilenetV1FeatureExtractorTest( ...@@ -87,12 +119,15 @@ class SsdMobilenetV1FeatureExtractorTest(
(2, 1, 1, 256), (2, 1, 1, 128)] (2, 1, 1, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shapes_with_dynamic_inputs( self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
2, image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=False) expected_feature_map_shape, use_explicit_padding=False,
use_keras=use_keras)
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=True) expected_feature_map_shape, use_explicit_padding=True,
use_keras=use_keras)
def test_extract_features_returns_correct_shapes_enforcing_min_depth(self): def test_extract_features_returns_correct_shapes_enforcing_min_depth(
self, use_keras):
image_height = 299 image_height = 299
image_width = 299 image_width = 299
depth_multiplier = 0.5**12 depth_multiplier = 0.5**12
...@@ -102,12 +137,15 @@ class SsdMobilenetV1FeatureExtractorTest( ...@@ -102,12 +137,15 @@ class SsdMobilenetV1FeatureExtractorTest(
(2, 2, 2, 32), (2, 1, 1, 32)] (2, 2, 2, 32), (2, 1, 1, 32)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=False) expected_feature_map_shape, use_explicit_padding=False,
use_keras=use_keras)
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=True) expected_feature_map_shape, use_explicit_padding=True,
use_keras=use_keras)
def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self): def test_extract_features_returns_correct_shapes_with_pad_to_multiple(
self, use_keras):
image_height = 299 image_height = 299
image_width = 299 image_width = 299
depth_multiplier = 1.0 depth_multiplier = 1.0
...@@ -117,48 +155,63 @@ class SsdMobilenetV1FeatureExtractorTest( ...@@ -117,48 +155,63 @@ class SsdMobilenetV1FeatureExtractorTest(
(2, 2, 2, 256), (2, 1, 1, 128)] (2, 2, 2, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=False) expected_feature_map_shape, use_explicit_padding=False,
use_keras=use_keras)
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=True) expected_feature_map_shape, use_explicit_padding=True,
use_keras=use_keras)
def test_extract_features_raises_error_with_invalid_image_size(self): def test_extract_features_raises_error_with_invalid_image_size(
self, use_keras):
image_height = 32 image_height = 32
image_width = 32 image_width = 32
depth_multiplier = 1.0 depth_multiplier = 1.0
pad_to_multiple = 1 pad_to_multiple = 1
self.check_extract_features_raises_error_with_invalid_image_size( self.check_extract_features_raises_error_with_invalid_image_size(
image_height, image_width, depth_multiplier, pad_to_multiple) image_height, image_width, depth_multiplier, pad_to_multiple,
use_keras=use_keras)
def test_preprocess_returns_correct_value_range(self): def test_preprocess_returns_correct_value_range(self, use_keras):
image_height = 128 image_height = 128
image_width = 128 image_width = 128
depth_multiplier = 1 depth_multiplier = 1
pad_to_multiple = 1 pad_to_multiple = 1
test_image = np.random.rand(2, image_height, image_width, 3) test_image = np.random.rand(2, image_height, image_width, 3)
feature_extractor = self._create_feature_extractor(depth_multiplier, feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple) pad_to_multiple,
use_keras=use_keras)
preprocessed_image = feature_extractor.preprocess(test_image) preprocessed_image = feature_extractor.preprocess(test_image)
self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0))) self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
def test_variables_only_created_in_scope(self): def test_variables_only_created_in_scope(self, use_keras):
depth_multiplier = 1 depth_multiplier = 1
pad_to_multiple = 1 pad_to_multiple = 1
scope_name = 'MobilenetV1' scope_name = 'MobilenetV1'
self.check_feature_extractor_variables_under_scope( self.check_feature_extractor_variables_under_scope(
depth_multiplier, pad_to_multiple, scope_name) depth_multiplier, pad_to_multiple, scope_name, use_keras=use_keras)
def test_has_fused_batchnorm(self): def test_variable_count(self, use_keras):
depth_multiplier = 1
pad_to_multiple = 1
variables = self.get_feature_extractor_variables(
depth_multiplier, pad_to_multiple, use_keras=use_keras)
self.assertEqual(len(variables), 151)
def test_has_fused_batchnorm(self, use_keras):
image_height = 40 image_height = 40
image_width = 40 image_width = 40
depth_multiplier = 1 depth_multiplier = 1
pad_to_multiple = 1 pad_to_multiple = 1
image_placeholder = tf.placeholder(tf.float32, image_placeholder = tf.placeholder(tf.float32,
[1, image_height, image_width, 3]) [1, image_height, image_width, 3])
feature_extractor = self._create_feature_extractor(depth_multiplier, feature_extractor = self._create_feature_extractor(
pad_to_multiple) depth_multiplier, pad_to_multiple, use_keras=use_keras)
preprocessed_image = feature_extractor.preprocess(image_placeholder) preprocessed_image = feature_extractor.preprocess(image_placeholder)
_ = feature_extractor.extract_features(preprocessed_image) if use_keras:
_ = feature_extractor(preprocessed_image)
else:
_ = feature_extractor.extract_features(preprocessed_image)
self.assertTrue(any(op.type == 'FusedBatchNorm' self.assertTrue(any(op.type == 'FusedBatchNorm'
for op in tf.get_default_graph().get_operations())) for op in tf.get_default_graph().get_operations()))
......
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""SSDFeatureExtractor for Keras MobilenetV1 features."""
import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators
from object_detection.models.keras_models import mobilenet_v1
from object_detection.utils import ops
from object_detection.utils import shape_utils
slim = tf.contrib.slim
class SSDMobileNetV1KerasFeatureExtractor(
ssd_meta_arch.SSDKerasFeatureExtractor):
"""SSD Feature Extractor using Keras MobilenetV1 features."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
freeze_batchnorm,
inplace_batchnorm_update,
use_explicit_padding=False,
use_depthwise=False,
override_base_feature_extractor_hyperparams=False,
name=None):
"""Keras MobileNetV1 Feature Extractor for SSD Models.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
containing convolution hyperparameters for the layers added on top of
the base feature extractor.
freeze_batchnorm: Whether to freeze batch norm parameters during
training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm
params.
inplace_batchnorm_update: Whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
use_depthwise: Whether to use depthwise convolutions. Default is False.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams`.
name: A string name scope to assign to the model. If 'None', Keras
will auto-generate one from the class name.
"""
super(SSDMobileNetV1KerasFeatureExtractor, self).__init__(
is_training=is_training,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=freeze_batchnorm,
inplace_batchnorm_update=inplace_batchnorm_update,
use_explicit_padding=use_explicit_padding,
use_depthwise=use_depthwise,
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams,
name=name)
self._feature_map_layout = {
'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '',
'', ''],
'layer_depth': [-1, -1, 512, 256, 256, 128],
'use_explicit_padding': self._use_explicit_padding,
'use_depthwise': self._use_depthwise,
}
self._mobilenet_v1 = None
self._feature_map_generator = None
def build(self, input_shape):
full_mobilenet_v1 = mobilenet_v1.mobilenet_v1(
batchnorm_training=(self._is_training and not self._freeze_batchnorm),
conv_hyperparams=(self._conv_hyperparams
if self._override_base_feature_extractor_hyperparams
else None),
weights=None,
use_explicit_padding=self._use_explicit_padding,
alpha=self._depth_multiplier,
min_depth=self._min_depth,
include_top=False)
conv2d_11_pointwise = full_mobilenet_v1.get_layer(
name='conv_pw_11_relu').output
conv2d_13_pointwise = full_mobilenet_v1.get_layer(
name='conv_pw_13_relu').output
self._mobilenet_v1 = tf.keras.Model(
inputs=full_mobilenet_v1.inputs,
outputs=[conv2d_11_pointwise, conv2d_13_pointwise])
self._feature_map_generator = (
feature_map_generators.KerasMultiResolutionFeatureMaps(
feature_map_layout=self._feature_map_layout,
depth_multiplier=self._depth_multiplier,
min_depth=self._min_depth,
insert_1x1_conv=True,
is_training=self._is_training,
conv_hyperparams=self._conv_hyperparams,
freeze_batchnorm=self._freeze_batchnorm,
name='FeatureMaps'))
self.built = True
def preprocess(self, resized_inputs):
"""SSD preprocessing.
Maps pixel values to the range [-1, 1].
Args:
resized_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
return (2.0 / 255.0) * resized_inputs - 1.0
def _extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
preprocessed_inputs = shape_utils.check_min_image_dim(
33, preprocessed_inputs)
image_features = self._mobilenet_v1(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple))
feature_maps = self._feature_map_generator({
'Conv2d_11_pointwise': image_features[0],
'Conv2d_13_pointwise': image_features[1]})
return feature_maps.values()
...@@ -19,7 +19,7 @@ import tensorflow as tf ...@@ -19,7 +19,7 @@ import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators from object_detection.models import feature_map_generators
from object_detection.models.keras_applications import mobilenet_v2 from object_detection.models.keras_models import mobilenet_v2
from object_detection.utils import ops from object_detection.utils import ops
from object_detection.utils import shape_utils from object_detection.utils import shape_utils
......
...@@ -53,8 +53,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -53,8 +53,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
Args: Args:
is_training: whether the network is in training mode. is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor. depth_multiplier: float depth multiplier for feature extractor.
UNUSED currently. min_depth: minimum feature extractor depth.
min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
...@@ -96,9 +95,6 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -96,9 +95,6 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
use_depthwise=use_depthwise, use_depthwise=use_depthwise,
override_base_feature_extractor_hyperparams= override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams) override_base_feature_extractor_hyperparams)
if self._depth_multiplier != 1.0:
raise ValueError('Only depth 1.0 is supported, found: {}'.
format(self._depth_multiplier))
if self._use_explicit_padding is True: if self._use_explicit_padding is True:
raise ValueError('Explicit padding is not a valid option.') raise ValueError('Explicit padding is not a valid option.')
self._resnet_base_fn = resnet_base_fn self._resnet_base_fn = resnet_base_fn
...@@ -150,13 +146,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -150,13 +146,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
Returns: Returns:
feature_maps: a list of tensors where the ith tensor has shape feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i] [batch, height_i, width_i, depth_i]
Raises:
ValueError: depth multiplier is not supported.
""" """
if self._depth_multiplier != 1.0:
raise ValueError('Depth multiplier not supported.')
preprocessed_inputs = shape_utils.check_min_image_dim( preprocessed_inputs = shape_utils.check_min_image_dim(
129, preprocessed_inputs) 129, preprocessed_inputs)
...@@ -174,8 +164,11 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -174,8 +164,11 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
global_pool=False, global_pool=False,
output_stride=None, output_stride=None,
store_non_strided_activations=True, store_non_strided_activations=True,
min_base_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
scope=scope) scope=scope)
image_features = self._filter_features(image_features) image_features = self._filter_features(image_features)
depth_fn = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
with slim.arg_scope(self._conv_hyperparams_fn()): with slim.arg_scope(self._conv_hyperparams_fn()):
with tf.variable_scope(self._fpn_scope_name, with tf.variable_scope(self._fpn_scope_name,
reuse=self._reuse_weights): reuse=self._reuse_weights):
...@@ -185,7 +178,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -185,7 +178,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
feature_block_list.append('block{}'.format(level - 1)) feature_block_list.append('block{}'.format(level - 1))
fpn_features = feature_map_generators.fpn_top_down_feature_maps( fpn_features = feature_map_generators.fpn_top_down_feature_maps(
[(key, image_features[key]) for key in feature_block_list], [(key, image_features[key]) for key in feature_block_list],
depth=self._additional_layer_depth) depth=depth_fn(self._additional_layer_depth))
feature_maps = [] feature_maps = []
for level in range(self._fpn_min_level, base_fpn_max_level + 1): for level in range(self._fpn_min_level, base_fpn_max_level + 1):
feature_maps.append( feature_maps.append(
...@@ -196,7 +189,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -196,7 +189,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
for i in range(base_fpn_max_level, self._fpn_max_level): for i in range(base_fpn_max_level, self._fpn_max_level):
last_feature_map = slim.conv2d( last_feature_map = slim.conv2d(
last_feature_map, last_feature_map,
num_outputs=self._additional_layer_depth, num_outputs=depth_fn(self._additional_layer_depth),
kernel_size=[3, 3], kernel_size=[3, 3],
stride=2, stride=2,
padding='SAME', padding='SAME',
...@@ -226,8 +219,7 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor): ...@@ -226,8 +219,7 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
Args: Args:
is_training: whether the network is in training mode. is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor. depth_multiplier: float depth multiplier for feature extractor.
UNUSED currently. min_depth: minimum feature extractor depth.
min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
...@@ -284,8 +276,7 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor): ...@@ -284,8 +276,7 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
Args: Args:
is_training: whether the network is in training mode. is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor. depth_multiplier: float depth multiplier for feature extractor.
UNUSED currently. min_depth: minimum feature extractor depth.
min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
...@@ -342,8 +333,7 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor): ...@@ -342,8 +333,7 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
Args: Args:
is_training: whether the network is in training mode. is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor. depth_multiplier: float depth multiplier for feature extractor.
UNUSED currently. min_depth: minimum feature extractor depth.
min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
......
...@@ -25,8 +25,7 @@ class SSDResnet50V1FeatureExtractorTest( ...@@ -25,8 +25,7 @@ class SSDResnet50V1FeatureExtractorTest(
"""SSDResnet50v1Fpn feature extractor test.""" """SSDResnet50v1Fpn feature extractor test."""
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
use_explicit_padding=False): use_explicit_padding=False, min_depth=32):
min_depth = 32
is_training = True is_training = True
return ssd_resnet_v1_fpn_feature_extractor.SSDResnet50V1FpnFeatureExtractor( return ssd_resnet_v1_fpn_feature_extractor.SSDResnet50V1FpnFeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
...@@ -42,8 +41,7 @@ class SSDResnet101V1FeatureExtractorTest( ...@@ -42,8 +41,7 @@ class SSDResnet101V1FeatureExtractorTest(
"""SSDResnet101v1Fpn feature extractor test.""" """SSDResnet101v1Fpn feature extractor test."""
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
use_explicit_padding=False): use_explicit_padding=False, min_depth=32):
min_depth = 32
is_training = True is_training = True
return ( return (
ssd_resnet_v1_fpn_feature_extractor.SSDResnet101V1FpnFeatureExtractor( ssd_resnet_v1_fpn_feature_extractor.SSDResnet101V1FpnFeatureExtractor(
...@@ -64,8 +62,7 @@ class SSDResnet152V1FeatureExtractorTest( ...@@ -64,8 +62,7 @@ class SSDResnet152V1FeatureExtractorTest(
"""SSDResnet152v1Fpn feature extractor test.""" """SSDResnet152v1Fpn feature extractor test."""
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
use_explicit_padding=False): use_explicit_padding=False, min_depth=32):
min_depth = 32
is_training = True is_training = True
return ( return (
ssd_resnet_v1_fpn_feature_extractor.SSDResnet152V1FpnFeatureExtractor( ssd_resnet_v1_fpn_feature_extractor.SSDResnet152V1FpnFeatureExtractor(
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
# ============================================================================== # ==============================================================================
"""Tests for ssd resnet v1 FPN feature extractors.""" """Tests for ssd resnet v1 FPN feature extractors."""
import abc import abc
import itertools
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
...@@ -32,6 +33,14 @@ class SSDResnetFPNFeatureExtractorTestBase( ...@@ -32,6 +33,14 @@ class SSDResnetFPNFeatureExtractorTestBase(
def _fpn_scope_name(self): def _fpn_scope_name(self):
return 'fpn' return 'fpn'
@abc.abstractmethod
def _create_feature_extractor(self,
depth_multiplier,
pad_to_multiple,
use_explicit_padding=False,
min_depth=32):
pass
def test_extract_features_returns_correct_shapes_256(self): def test_extract_features_returns_correct_shapes_256(self):
image_height = 256 image_height = 256
image_width = 256 image_width = 256
...@@ -56,6 +65,45 @@ class SSDResnetFPNFeatureExtractorTestBase( ...@@ -56,6 +65,45 @@ class SSDResnetFPNFeatureExtractorTestBase(
2, image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape) expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_with_depth_multiplier(self):
image_height = 256
image_width = 256
depth_multiplier = 0.5
expected_num_channels = int(256 * depth_multiplier)
pad_to_multiple = 1
expected_feature_map_shape = [(2, 32, 32, expected_num_channels),
(2, 16, 16, expected_num_channels),
(2, 8, 8, expected_num_channels),
(2, 4, 4, expected_num_channels),
(2, 2, 2, expected_num_channels)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_with_min_depth(self):
image_height = 256
image_width = 256
depth_multiplier = 1.0
pad_to_multiple = 1
min_depth = 320
expected_feature_map_shape = [(2, 32, 32, min_depth),
(2, 16, 16, min_depth),
(2, 8, 8, min_depth),
(2, 4, 4, min_depth),
(2, 2, 2, min_depth)]
def graph_fn(image_tensor):
feature_extractor = self._create_feature_extractor(
depth_multiplier, pad_to_multiple, min_depth=min_depth)
return feature_extractor.extract_features(image_tensor)
image_tensor = np.random.rand(2, image_height, image_width,
3).astype(np.float32)
feature_maps = self.execute(graph_fn, [image_tensor])
for feature_map, expected_shape in itertools.izip(
feature_maps, expected_feature_map_shape):
self.assertAllEqual(feature_map.shape, expected_shape)
def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self): def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
image_height = 254 image_height = 254
image_width = 254 image_width = 254
......
...@@ -54,8 +54,8 @@ ...@@ -54,8 +54,8 @@
"sys.path.append(\"..\")\n", "sys.path.append(\"..\")\n",
"from object_detection.utils import ops as utils_ops\n", "from object_detection.utils import ops as utils_ops\n",
"\n", "\n",
"if StrictVersion(tf.__version__) \u003c StrictVersion('1.9.0'):\n", "if StrictVersion(tf.__version__) \u003c StrictVersion('1.12.0'):\n",
" raise ImportError('Please upgrade your TensorFlow installation to v1.9.* or later!')\n" " raise ImportError('Please upgrade your TensorFlow installation to v1.12.*.')\n"
] ]
}, },
{ {
......
...@@ -108,14 +108,16 @@ class ConvolutionalBoxPredictor(box_predictor.BoxPredictor): ...@@ -108,14 +108,16 @@ class ConvolutionalBoxPredictor(box_predictor.BoxPredictor):
feature map. feature map.
Returns: Returns:
box_encodings: A list of float tensors of shape A dictionary containing:
[batch_size, num_anchors_i, q, code_size] representing the location of box_encodings: A list of float tensors of shape
the objects, where q is 1 or the number of classes. Each entry in the [batch_size, num_anchors_i, q, code_size] representing the location of
list corresponds to a feature map in the input `image_features` list. the objects, where q is 1 or the number of classes. Each entry in the
class_predictions_with_background: A list of float tensors of shape list corresponds to a feature map in the input `image_features` list.
[batch_size, num_anchors_i, num_classes + 1] representing the class class_predictions_with_background: A list of float tensors of shape
predictions for the proposals. Each entry in the list corresponds to a [batch_size, num_anchors_i, num_classes + 1] representing the class
feature map in the input `image_features` list. predictions for the proposals. Each entry in the list corresponds to a
feature map in the input `image_features` list.
(optional) Predictions from other heads.
""" """
predictions = { predictions = {
BOX_ENCODINGS: [], BOX_ENCODINGS: [],
...@@ -226,8 +228,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor): ...@@ -226,8 +228,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
kernel_size: Size of final convolution kernel. kernel_size: Size of final convolution kernel.
apply_batch_norm: Whether to apply batch normalization to conv layers in apply_batch_norm: Whether to apply batch normalization to conv layers in
this predictor. this predictor.
share_prediction_tower: Whether to share the multi-layer tower between box share_prediction_tower: Whether to share the multi-layer tower among box
prediction and class prediction heads. prediction head, class prediction head and other heads.
use_depthwise: Whether to use depthwise separable conv2d instead of use_depthwise: Whether to use depthwise separable conv2d instead of
regular conv2d. regular conv2d.
""" """
...@@ -270,9 +272,7 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor): ...@@ -270,9 +272,7 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
inserted_layer_counter += 1 inserted_layer_counter += 1
return image_feature, inserted_layer_counter return image_feature, inserted_layer_counter
def _compute_base_tower(self, tower_name_scope, image_feature, feature_index, def _compute_base_tower(self, tower_name_scope, image_feature, feature_index):
has_different_feature_channels, target_channel,
inserted_layer_counter):
net = image_feature net = image_feature
for i in range(self._num_layers_before_predictor): for i in range(self._num_layers_before_predictor):
if self._use_depthwise: if self._use_depthwise:
...@@ -296,23 +296,18 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor): ...@@ -296,23 +296,18 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
return net return net
def _predict_head(self, head_name, head_obj, image_feature, box_tower_feature, def _predict_head(self, head_name, head_obj, image_feature, box_tower_feature,
feature_index, has_different_feature_channels, feature_index, num_predictions_per_location):
target_channel, inserted_layer_counter,
num_predictions_per_location):
if head_name == CLASS_PREDICTIONS_WITH_BACKGROUND: if head_name == CLASS_PREDICTIONS_WITH_BACKGROUND:
tower_name_scope = 'ClassPredictionTower' tower_name_scope = 'ClassPredictionTower'
else: else:
raise ValueError('Unknown head') tower_name_scope = head_name + 'PredictionTower'
if self._share_prediction_tower: if self._share_prediction_tower:
head_tower_feature = box_tower_feature head_tower_feature = box_tower_feature
else: else:
head_tower_feature = self._compute_base_tower( head_tower_feature = self._compute_base_tower(
tower_name_scope=tower_name_scope, tower_name_scope=tower_name_scope,
image_feature=image_feature, image_feature=image_feature,
feature_index=feature_index, feature_index=feature_index)
has_different_feature_channels=has_different_feature_channels,
target_channel=target_channel,
inserted_layer_counter=inserted_layer_counter)
return head_obj.predict( return head_obj.predict(
features=head_tower_feature, features=head_tower_feature,
num_predictions_per_location=num_predictions_per_location) num_predictions_per_location=num_predictions_per_location)
...@@ -341,13 +336,13 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor): ...@@ -341,13 +336,13 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
[batch_size, num_anchors_i, num_classes + 1] representing the class [batch_size, num_anchors_i, num_classes + 1] representing the class
predictions for the proposals. Each entry in the list corresponds to a predictions for the proposals. Each entry in the list corresponds to a
feature map in the input `image_features` list. feature map in the input `image_features` list.
(optional) mask_predictions: A list of float tensors of shape (optional) Predictions from other heads.
E.g., mask_predictions: A list of float tensors of shape
[batch_size, num_anchord_i, num_classes, mask_height, mask_width]. [batch_size, num_anchord_i, num_classes, mask_height, mask_width].
Raises: Raises:
ValueError: If the image feature maps do not have the same number of ValueError: If the num predictions per locations differs between the
channels or if the num predictions per locations is differs between the
feature maps. feature maps.
""" """
if len(set(num_predictions_per_location_list)) > 1: if len(set(num_predictions_per_location_list)) > 1:
...@@ -392,10 +387,7 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor): ...@@ -392,10 +387,7 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
box_tower_feature = self._compute_base_tower( box_tower_feature = self._compute_base_tower(
tower_name_scope=box_tower_scope, tower_name_scope=box_tower_scope,
image_feature=image_feature, image_feature=image_feature,
feature_index=feature_index, feature_index=feature_index)
has_different_feature_channels=has_different_feature_channels,
target_channel=target_channel,
inserted_layer_counter=inserted_layer_counter)
box_encodings = self._box_prediction_head.predict( box_encodings = self._box_prediction_head.predict(
features=box_tower_feature, features=box_tower_feature,
num_predictions_per_location=num_predictions_per_location) num_predictions_per_location=num_predictions_per_location)
...@@ -413,9 +405,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor): ...@@ -413,9 +405,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
image_feature=image_feature, image_feature=image_feature,
box_tower_feature=box_tower_feature, box_tower_feature=box_tower_feature,
feature_index=feature_index, feature_index=feature_index,
has_different_feature_channels=has_different_feature_channels,
target_channel=target_channel,
inserted_layer_counter=inserted_layer_counter,
num_predictions_per_location=num_predictions_per_location) num_predictions_per_location=num_predictions_per_location)
predictions[head_name].append(prediction) predictions[head_name].append(prediction)
return predictions return predictions
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
# ============================================================================== # ==============================================================================
"""Tests for object_detection.predictors.convolutional_box_predictor.""" """Tests for object_detection.predictors.convolutional_box_predictor."""
from absl.testing import parameterized
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
...@@ -21,6 +23,9 @@ from google.protobuf import text_format ...@@ -21,6 +23,9 @@ from google.protobuf import text_format
from object_detection.builders import box_predictor_builder from object_detection.builders import box_predictor_builder
from object_detection.builders import hyperparams_builder from object_detection.builders import hyperparams_builder
from object_detection.predictors import convolutional_box_predictor as box_predictor from object_detection.predictors import convolutional_box_predictor as box_predictor
from object_detection.predictors.heads import box_head
from object_detection.predictors.heads import class_head
from object_detection.predictors.heads import mask_head
from object_detection.protos import hyperparams_pb2 from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case from object_detection.utils import test_case
...@@ -852,5 +857,66 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase): ...@@ -852,5 +857,66 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
self.assertAllEqual(objectness_predictions_shape, self.assertAllEqual(objectness_predictions_shape,
[4, expected_num_anchors, 1]) [4, expected_num_anchors, 1])
def test_other_heads_predictions(self):
box_code_size = 4
num_classes_without_background = 3
other_head_name = 'Mask'
mask_height = 5
mask_width = 5
num_predictions_per_location = 5
def graph_fn(image_features):
box_prediction_head = box_head.WeightSharedConvolutionalBoxHead(
box_code_size)
class_prediction_head = class_head.WeightSharedConvolutionalClassHead(
num_classes_without_background + 1)
other_heads = {
other_head_name:
mask_head.WeightSharedConvolutionalMaskHead(
num_classes_without_background,
mask_height=mask_height,
mask_width=mask_width)
}
conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
is_training=False,
num_classes=num_classes_without_background,
box_prediction_head=box_prediction_head,
class_prediction_head=class_prediction_head,
other_heads=other_heads,
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
depth=32,
num_layers_before_predictor=2)
box_predictions = conv_box_predictor.predict(
[image_features],
num_predictions_per_location=[num_predictions_per_location],
scope='BoxPredictor')
for key, value in box_predictions.items():
box_predictions[key] = tf.concat(value, axis=1)
assert len(box_predictions) == 3
return (box_predictions[box_predictor.BOX_ENCODINGS],
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
box_predictions[other_head_name])
batch_size = 4
feature_ht = 8
feature_wt = 8
image_features = np.random.rand(batch_size, feature_ht, feature_wt,
64).astype(np.float32)
(box_encodings, class_predictions, other_head_predictions) = self.execute(
graph_fn, [image_features])
num_anchors = feature_ht * feature_wt * num_predictions_per_location
self.assertAllEqual(box_encodings.shape,
[batch_size, num_anchors, box_code_size])
self.assertAllEqual(
class_predictions.shape,
[batch_size, num_anchors, num_classes_without_background + 1])
self.assertAllEqual(other_head_predictions.shape, [
batch_size, num_anchors, num_classes_without_background, mask_height,
mask_width
])
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -191,7 +191,69 @@ class ConvolutionalKerasBoxPredictorTest(test_case.TestCase): ...@@ -191,7 +191,69 @@ class ConvolutionalKerasBoxPredictorTest(test_case.TestCase):
self.assertEqual(conv_box_predictor._sorted_head_names, self.assertEqual(conv_box_predictor._sorted_head_names,
['box_encodings', 'class_predictions_with_background']) ['box_encodings', 'class_predictions_with_background'])
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10 def test_use_depthwise_convolution(self):
image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64])
conv_box_predictor = (
box_predictor_builder.build_convolutional_keras_box_predictor(
is_training=False,
num_classes=0,
conv_hyperparams=self._build_conv_hyperparams(),
freeze_batchnorm=False,
inplace_batchnorm_update=False,
num_predictions_per_location_list=[5],
min_depth=0,
max_depth=32,
num_layers_before_predictor=1,
use_dropout=True,
dropout_keep_prob=0.8,
kernel_size=1,
box_code_size=4,
use_depthwise=True
))
box_predictions = conv_box_predictor([image_features])
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
objectness_predictions = tf.concat(
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
axis=1)
init_op = tf.global_variables_initializer()
resolution = 32
expected_num_anchors = resolution*resolution*5
with self.test_session() as sess:
sess.run(init_op)
(box_encodings_shape,
objectness_predictions_shape) = sess.run(
[tf.shape(box_encodings), tf.shape(objectness_predictions)],
feed_dict={image_features:
np.random.rand(4, resolution, resolution, 64)})
actual_variable_set = set(
[var.op.name for var in tf.trainable_variables()])
self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4])
self.assertAllEqual(objectness_predictions_shape,
[4, expected_num_anchors, 1])
expected_variable_set = set([
'BoxPredictor/SharedConvolutions_0/Conv2d_0_1x1_32/bias',
'BoxPredictor/SharedConvolutions_0/Conv2d_0_1x1_32/kernel',
'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor_depthwise/'
'bias',
'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor_depthwise/'
'depthwise_kernel',
'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/bias',
'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/kernel',
'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor_depthwise/bias',
'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor_depthwise/'
'depthwise_kernel',
'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor/bias',
'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor/kernel'])
self.assertEqual(expected_variable_set, actual_variable_set)
self.assertEqual(conv_box_predictor._sorted_head_names,
['box_encodings', 'class_predictions_with_background'])
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -56,7 +56,20 @@ class ConvolutionalKerasBoxHeadTest(test_case.TestCase): ...@@ -56,7 +56,20 @@ class ConvolutionalKerasBoxHeadTest(test_case.TestCase):
box_encodings = box_prediction_head(image_feature) box_encodings = box_prediction_head(image_feature)
self.assertAllEqual([64, 323, 1, 4], box_encodings.get_shape().as_list()) self.assertAllEqual([64, 323, 1, 4], box_encodings.get_shape().as_list())
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10 def test_prediction_size_depthwise_true(self):
conv_hyperparams = self._build_conv_hyperparams()
box_prediction_head = keras_box_head.ConvolutionalBoxHead(
is_training=True,
box_code_size=4,
kernel_size=3,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=False,
num_predictions_per_location=1,
use_depthwise=True)
image_feature = tf.random_uniform(
[64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
box_encodings = box_prediction_head(image_feature)
self.assertAllEqual([64, 323, 1, 4], box_encodings.get_shape().as_list())
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -59,7 +59,23 @@ class ConvolutionalKerasClassPredictorTest(test_case.TestCase): ...@@ -59,7 +59,23 @@ class ConvolutionalKerasClassPredictorTest(test_case.TestCase):
self.assertAllEqual([64, 323, 20], self.assertAllEqual([64, 323, 20],
class_predictions.get_shape().as_list()) class_predictions.get_shape().as_list())
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10 def test_prediction_size_depthwise_true(self):
conv_hyperparams = self._build_conv_hyperparams()
class_prediction_head = keras_class_head.ConvolutionalClassHead(
is_training=True,
num_class_slots=20,
use_dropout=True,
dropout_keep_prob=0.5,
kernel_size=3,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=False,
num_predictions_per_location=1,
use_depthwise=True)
image_feature = tf.random_uniform(
[64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
class_predictions = class_prediction_head(image_feature,)
self.assertAllEqual([64, 323, 20],
class_predictions.get_shape().as_list())
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -61,7 +61,25 @@ class ConvolutionalMaskPredictorTest(test_case.TestCase): ...@@ -61,7 +61,25 @@ class ConvolutionalMaskPredictorTest(test_case.TestCase):
self.assertAllEqual([64, 323, 20, 7, 7], self.assertAllEqual([64, 323, 20, 7, 7],
mask_predictions.get_shape().as_list()) mask_predictions.get_shape().as_list())
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10 def test_prediction_size_use_depthwise_true(self):
conv_hyperparams = self._build_conv_hyperparams()
mask_prediction_head = keras_mask_head.ConvolutionalMaskHead(
is_training=True,
num_classes=20,
use_dropout=True,
dropout_keep_prob=0.5,
kernel_size=3,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=False,
num_predictions_per_location=1,
use_depthwise=True,
mask_height=7,
mask_width=7)
image_feature = tf.random_uniform(
[64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
mask_predictions = mask_prediction_head(image_feature)
self.assertAllEqual([64, 323, 20, 7, 7],
mask_predictions.get_shape().as_list())
def test_class_agnostic_prediction_size_use_depthwise_false(self): def test_class_agnostic_prediction_size_use_depthwise_false(self):
conv_hyperparams = self._build_conv_hyperparams() conv_hyperparams = self._build_conv_hyperparams()
...@@ -84,7 +102,26 @@ class ConvolutionalMaskPredictorTest(test_case.TestCase): ...@@ -84,7 +102,26 @@ class ConvolutionalMaskPredictorTest(test_case.TestCase):
self.assertAllEqual([64, 323, 1, 7, 7], self.assertAllEqual([64, 323, 1, 7, 7],
mask_predictions.get_shape().as_list()) mask_predictions.get_shape().as_list())
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10 def test_class_agnostic_prediction_size_use_depthwise_true(self):
conv_hyperparams = self._build_conv_hyperparams()
mask_prediction_head = keras_mask_head.ConvolutionalMaskHead(
is_training=True,
num_classes=20,
use_dropout=True,
dropout_keep_prob=0.5,
kernel_size=3,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=False,
num_predictions_per_location=1,
use_depthwise=True,
mask_height=7,
mask_width=7,
masks_are_class_agnostic=True)
image_feature = tf.random_uniform(
[64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
mask_predictions = mask_prediction_head(image_feature)
self.assertAllEqual([64, 323, 1, 7, 7],
mask_predictions.get_shape().as_list())
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
syntax = "proto2";
package object_detection.protos;
// Message wrapper for various calibration configurations
message CalibrationConfig {
oneof calibrator {
// Class-agnostic calibration via linear interpolation (usually output from
// isotonic regression)
FunctionApproximation function_approximation = 1;
// Per-class calibration via linear interpolation
LabelFunctionApproximations label_function_approximations = 2;
// Class-agnostic sigmoid calibration
SigmoidCalibration sigmoid_calibration = 3;
// Per-class sigmoid calibration
LabelSigmoidCalibrations label_sigmoid_calibrations = 4;
}
}
// Message for class-agnostic domain/range mapping for function
// approximations
message FunctionApproximation {
// Message mapping class labels to indices
optional XYPairs x_y_pairs = 1;
}
// Message for class-specific domain/range mapping for function
// approximations
message LabelFunctionApproximations {
// Message mapping class labels to indices
map<string, XYPairs> label_xy_pairs_map = 1;
// Label map to map label names from to class ids.
optional string label_map_path = 2;
}
// Message for class-agnostic Sigmoid Calibration
message SigmoidCalibration {
// Message mapping class index to Sigmoid Parameters
optional SigmoidParameters sigmoid_parameters = 1;
}
// Message for class-specific Sigmoid Calibration
message LabelSigmoidCalibrations {
// Message mapping class index to Sigmoid Parameters
map<string, SigmoidParameters> label_sigmoid_parameters_map = 1;
// Label map to map label names from to class ids.
optional string label_map_path = 2;
}
// Message to store a domain/range pair for function to be approximated
message XYPairs {
message XYPair {
optional float x = 1;
optional float y = 2;
}
// Sequence of x/y pairs for function approximation
repeated XYPair x_y_pair = 1;
}
// Message defining parameters for sigmoid calibration.
message SigmoidParameters {
optional float a = 1 [default = -1.0];
optional float b = 2 [default = 0.0];
}
...@@ -8,6 +8,7 @@ message ImageResizer { ...@@ -8,6 +8,7 @@ message ImageResizer {
oneof image_resizer_oneof { oneof image_resizer_oneof {
KeepAspectRatioResizer keep_aspect_ratio_resizer = 1; KeepAspectRatioResizer keep_aspect_ratio_resizer = 1;
FixedShapeResizer fixed_shape_resizer = 2; FixedShapeResizer fixed_shape_resizer = 2;
IdentityResizer identity_resizer = 3;
} }
} }
...@@ -19,6 +20,9 @@ enum ResizeType { ...@@ -19,6 +20,9 @@ enum ResizeType {
AREA = 3; // Corresponds to tf.image.ResizeMethod.AREA AREA = 3; // Corresponds to tf.image.ResizeMethod.AREA
} }
message IdentityResizer {
}
// Configuration proto for image resizer that keeps aspect ratio. // Configuration proto for image resizer that keeps aspect ratio.
message KeepAspectRatioResizer { message KeepAspectRatioResizer {
// Desired size of the smaller image dimension in pixels. // Desired size of the smaller image dimension in pixels.
......
...@@ -22,7 +22,7 @@ enum InstanceMaskType { ...@@ -22,7 +22,7 @@ enum InstanceMaskType {
PNG_MASKS = 2; // Encoded PNG masks. PNG_MASKS = 2; // Encoded PNG masks.
} }
// Next id: 24 // Next id: 25
message InputReader { message InputReader {
// Name of input reader. Typically used to describe the dataset that is read // Name of input reader. Typically used to describe the dataset that is read
// by this input reader. // by this input reader.
...@@ -94,6 +94,9 @@ message InputReader { ...@@ -94,6 +94,9 @@ message InputReader {
// otherwise some groundtruth boxes may be clipped. // otherwise some groundtruth boxes may be clipped.
optional int32 max_number_of_boxes = 21 [default=100]; optional int32 max_number_of_boxes = 21 [default=100];
// Whether to load multiclass scores from the dataset.
optional bool load_multiclass_scores = 24 [default = false];
// Whether to load groundtruth instance masks. // Whether to load groundtruth instance masks.
optional bool load_instance_masks = 7 [default = false]; optional bool load_instance_masks = 7 [default = false];
......
...@@ -38,6 +38,7 @@ message AdamOptimizer { ...@@ -38,6 +38,7 @@ message AdamOptimizer {
optional LearningRate learning_rate = 1; optional LearningRate learning_rate = 1;
} }
// Configuration message for optimizer learning rate. // Configuration message for optimizer learning rate.
message LearningRate { message LearningRate {
oneof learning_rate { oneof learning_rate {
......
...@@ -2,6 +2,8 @@ syntax = "proto2"; ...@@ -2,6 +2,8 @@ syntax = "proto2";
package object_detection.protos; package object_detection.protos;
import "object_detection/protos/calibration.proto";
// Configuration proto for non-max-suppression operation on a batch of // Configuration proto for non-max-suppression operation on a batch of
// detections. // detections.
message BatchNonMaxSuppression { message BatchNonMaxSuppression {
...@@ -46,4 +48,7 @@ message PostProcessing { ...@@ -46,4 +48,7 @@ message PostProcessing {
// Typically used for softmax distillation, though can be used to scale for // Typically used for softmax distillation, though can be used to scale for
// other reasons. // other reasons.
optional float logit_scale = 3 [default = 1.0]; optional float logit_scale = 3 [default = 1.0];
// Calibrate score outputs. Calibration is applied after score converter
// and before non max suppression.
optional CalibrationConfig calibration_config = 4;
} }
...@@ -34,6 +34,8 @@ message PreprocessingStep { ...@@ -34,6 +34,8 @@ message PreprocessingStep {
RandomRotation90 random_rotation90 = 26; RandomRotation90 random_rotation90 = 26;
RGBtoGray rgb_to_gray = 27; RGBtoGray rgb_to_gray = 27;
ConvertClassLogitsToSoftmax convert_class_logits_to_softmax = 28; ConvertClassLogitsToSoftmax convert_class_logits_to_softmax = 28;
RandomAbsolutePadImage random_absolute_pad_image = 29;
RandomSelfConcatImage random_self_concat_image = 30;
} }
} }
...@@ -179,6 +181,18 @@ message RandomPadImage { ...@@ -179,6 +181,18 @@ message RandomPadImage {
repeated float pad_color = 5; repeated float pad_color = 5;
} }
// Randomly adds a padding of size [0, max_height_padding), [0, max_width_padding).
message RandomAbsolutePadImage {
// Height will be padded uniformly at random from [0, max_height_padding).
optional int32 max_height_padding = 1;
// Width will be padded uniformly at random from [0, max_width_padding).
optional int32 max_width_padding = 2;
// Color of the padding. If unset, will pad using average color of the input
// image.
repeated float pad_color = 3;
}
// Randomly crops an image followed by a random pad. // Randomly crops an image followed by a random pad.
message RandomCropPadImage { message RandomCropPadImage {
// Cropping operation must cover at least one box by this fraction. // Cropping operation must cover at least one box by this fraction.
...@@ -243,8 +257,8 @@ message RandomBlackPatches { ...@@ -243,8 +257,8 @@ message RandomBlackPatches {
// Randomly resizes the image up to [target_height, target_width]. // Randomly resizes the image up to [target_height, target_width].
message RandomResizeMethod { message RandomResizeMethod {
optional float target_height = 1; optional int32 target_height = 1;
optional float target_width = 2; optional int32 target_width = 2;
} }
// Converts the RGB image to a grayscale image. This also converts the image // Converts the RGB image to a grayscale image. This also converts the image
...@@ -439,3 +453,11 @@ message ConvertClassLogitsToSoftmax { ...@@ -439,3 +453,11 @@ message ConvertClassLogitsToSoftmax {
// Scale to use on logits before applying softmax. // Scale to use on logits before applying softmax.
optional float temperature = 1 [default=1.0]; optional float temperature = 1 [default=1.0];
} }
// Randomly concatenates the image with itself horizontally and/or vertically.
message RandomSelfConcatImage {
// Probability of concatenating the image vertically.
optional float concat_vertical_probability = 1 [default = 0.1];
// Probability of concatenating the image horizontally.
optional float concat_horizontal_probability = 2 [default = 0.1];
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment