Commit 05584085 authored by pkulzc's avatar pkulzc Committed by Jonathan Huang
Browse files

Merged commit includes the following changes: (#6315)

236813471  by lzc:

    Internal change.

--
236507310  by lzc:

    Fix preprocess.random_resize_method config type issue. The target height and width will be passed as "size" to tf.image.resize_images which only accepts integer.

--
236409989  by Zhichao Lu:

    Config export_to_tpu from function parameter instead of HParams for TPU inference.

--
236403186  by Zhichao Lu:

    Make graph file names optional arguments.

--
236237072  by Zhichao Lu:

    Minor bugfix for keyword args.

--
236209602  by Zhichao Lu:

    Add support for PartitionedVariable to get_variables_available_in_checkpoint.

--
235828658  by Zhichao Lu:

    Automatically stop evaluation jobs when training is finished.

--
235817964  by Zhichao Lu:

    Add an optional process_metrics_fn callback to eval_util, it gets called
    with evaluation results once each evaluation is complete.

--
235788721  by lzc:

    Fix yml file tf runtime...
parent a5db4420
......@@ -29,7 +29,7 @@ from object_detection.utils import test_case
class SsdFeatureExtractorTestBase(test_case.TestCase):
def _build_conv_hyperparams(self):
def _build_conv_hyperparams(self, add_batch_norm=True):
conv_hyperparams = hyperparams_pb2.Hyperparams()
conv_hyperparams_text_proto = """
activation: RELU_6
......@@ -41,10 +41,14 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
truncated_normal_initializer {
}
}
batch_norm {
scale: false
}
"""
if add_batch_norm:
batch_norm_proto = """
batch_norm {
scale: false
}
"""
conv_hyperparams_text_proto += batch_norm_proto
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
......
......@@ -13,41 +13,69 @@
# limitations under the License.
# ==============================================================================
"""Tests for ssd_mobilenet_v1_feature_extractor."""
"""Tests for SSD Mobilenet V1 feature extractors.
By using parameterized test decorator, this test serves for both Slim-based and
Keras-based Mobilenet V1 feature extractors in SSD.
"""
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
from object_detection.models import ssd_feature_extractor_test
from object_detection.models import ssd_mobilenet_v1_feature_extractor
from object_detection.models import ssd_mobilenet_v1_keras_feature_extractor
slim = tf.contrib.slim
@parameterized.parameters(
{'use_keras': False},
{'use_keras': True},
)
class SsdMobilenetV1FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, use_explicit_padding=False):
use_explicit_padding=False, is_training=False,
use_keras=False):
"""Constructs a new feature extractor.
Args:
depth_multiplier: float depth multiplier for feature extractor
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
is_training: whether the network is in training mode.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
is_training: whether the network is in training mode.
use_keras: if True builds a keras-based feature extractor, if False builds
a slim-based one.
Returns:
an ssd_meta_arch.SSDFeatureExtractor object.
"""
min_depth = 32
return ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple,
self.conv_hyperparams_fn,
use_explicit_padding=use_explicit_padding)
def test_extract_features_returns_correct_shapes_128(self):
if use_keras:
return (ssd_mobilenet_v1_keras_feature_extractor.
SSDMobileNetV1KerasFeatureExtractor(
is_training=is_training,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams=self._build_conv_hyperparams(
add_batch_norm=False),
freeze_batchnorm=False,
inplace_batchnorm_update=False,
use_explicit_padding=use_explicit_padding,
name='MobilenetV1'))
else:
return ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple,
self.conv_hyperparams_fn,
use_explicit_padding=use_explicit_padding)
def test_extract_features_returns_correct_shapes_128(self, use_keras):
image_height = 128
image_width = 128
depth_multiplier = 1.0
......@@ -57,12 +85,14 @@ class SsdMobilenetV1FeatureExtractorTest(
(2, 1, 1, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=False)
expected_feature_map_shape, use_explicit_padding=False,
use_keras=use_keras)
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=True)
expected_feature_map_shape, use_explicit_padding=True,
use_keras=use_keras)
def test_extract_features_returns_correct_shapes_299(self):
def test_extract_features_returns_correct_shapes_299(self, use_keras):
image_height = 299
image_width = 299
depth_multiplier = 1.0
......@@ -72,12 +102,14 @@ class SsdMobilenetV1FeatureExtractorTest(
(2, 2, 2, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=False)
expected_feature_map_shape, use_explicit_padding=False,
use_keras=use_keras)
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=True)
expected_feature_map_shape, use_explicit_padding=True,
use_keras=use_keras)
def test_extract_features_with_dynamic_image_shape(self):
def test_extract_features_with_dynamic_image_shape(self, use_keras):
image_height = 128
image_width = 128
depth_multiplier = 1.0
......@@ -87,12 +119,15 @@ class SsdMobilenetV1FeatureExtractorTest(
(2, 1, 1, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=False)
expected_feature_map_shape, use_explicit_padding=False,
use_keras=use_keras)
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=True)
expected_feature_map_shape, use_explicit_padding=True,
use_keras=use_keras)
def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
def test_extract_features_returns_correct_shapes_enforcing_min_depth(
self, use_keras):
image_height = 299
image_width = 299
depth_multiplier = 0.5**12
......@@ -102,12 +137,15 @@ class SsdMobilenetV1FeatureExtractorTest(
(2, 2, 2, 32), (2, 1, 1, 32)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=False)
expected_feature_map_shape, use_explicit_padding=False,
use_keras=use_keras)
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=True)
expected_feature_map_shape, use_explicit_padding=True,
use_keras=use_keras)
def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
def test_extract_features_returns_correct_shapes_with_pad_to_multiple(
self, use_keras):
image_height = 299
image_width = 299
depth_multiplier = 1.0
......@@ -117,48 +155,63 @@ class SsdMobilenetV1FeatureExtractorTest(
(2, 2, 2, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=False)
expected_feature_map_shape, use_explicit_padding=False,
use_keras=use_keras)
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=True)
expected_feature_map_shape, use_explicit_padding=True,
use_keras=use_keras)
def test_extract_features_raises_error_with_invalid_image_size(self):
def test_extract_features_raises_error_with_invalid_image_size(
self, use_keras):
image_height = 32
image_width = 32
depth_multiplier = 1.0
pad_to_multiple = 1
self.check_extract_features_raises_error_with_invalid_image_size(
image_height, image_width, depth_multiplier, pad_to_multiple)
image_height, image_width, depth_multiplier, pad_to_multiple,
use_keras=use_keras)
def test_preprocess_returns_correct_value_range(self):
def test_preprocess_returns_correct_value_range(self, use_keras):
image_height = 128
image_width = 128
depth_multiplier = 1
pad_to_multiple = 1
test_image = np.random.rand(2, image_height, image_width, 3)
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
pad_to_multiple,
use_keras=use_keras)
preprocessed_image = feature_extractor.preprocess(test_image)
self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
def test_variables_only_created_in_scope(self):
def test_variables_only_created_in_scope(self, use_keras):
depth_multiplier = 1
pad_to_multiple = 1
scope_name = 'MobilenetV1'
self.check_feature_extractor_variables_under_scope(
depth_multiplier, pad_to_multiple, scope_name)
depth_multiplier, pad_to_multiple, scope_name, use_keras=use_keras)
def test_has_fused_batchnorm(self):
def test_variable_count(self, use_keras):
depth_multiplier = 1
pad_to_multiple = 1
variables = self.get_feature_extractor_variables(
depth_multiplier, pad_to_multiple, use_keras=use_keras)
self.assertEqual(len(variables), 151)
def test_has_fused_batchnorm(self, use_keras):
image_height = 40
image_width = 40
depth_multiplier = 1
pad_to_multiple = 1
image_placeholder = tf.placeholder(tf.float32,
[1, image_height, image_width, 3])
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
feature_extractor = self._create_feature_extractor(
depth_multiplier, pad_to_multiple, use_keras=use_keras)
preprocessed_image = feature_extractor.preprocess(image_placeholder)
_ = feature_extractor.extract_features(preprocessed_image)
if use_keras:
_ = feature_extractor(preprocessed_image)
else:
_ = feature_extractor.extract_features(preprocessed_image)
self.assertTrue(any(op.type == 'FusedBatchNorm'
for op in tf.get_default_graph().get_operations()))
......
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""SSDFeatureExtractor for Keras MobilenetV1 features."""
import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators
from object_detection.models.keras_models import mobilenet_v1
from object_detection.utils import ops
from object_detection.utils import shape_utils
slim = tf.contrib.slim
class SSDMobileNetV1KerasFeatureExtractor(
ssd_meta_arch.SSDKerasFeatureExtractor):
"""SSD Feature Extractor using Keras MobilenetV1 features."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
freeze_batchnorm,
inplace_batchnorm_update,
use_explicit_padding=False,
use_depthwise=False,
override_base_feature_extractor_hyperparams=False,
name=None):
"""Keras MobileNetV1 Feature Extractor for SSD Models.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
containing convolution hyperparameters for the layers added on top of
the base feature extractor.
freeze_batchnorm: Whether to freeze batch norm parameters during
training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm
params.
inplace_batchnorm_update: Whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
use_depthwise: Whether to use depthwise convolutions. Default is False.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams`.
name: A string name scope to assign to the model. If 'None', Keras
will auto-generate one from the class name.
"""
super(SSDMobileNetV1KerasFeatureExtractor, self).__init__(
is_training=is_training,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=freeze_batchnorm,
inplace_batchnorm_update=inplace_batchnorm_update,
use_explicit_padding=use_explicit_padding,
use_depthwise=use_depthwise,
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams,
name=name)
self._feature_map_layout = {
'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '',
'', ''],
'layer_depth': [-1, -1, 512, 256, 256, 128],
'use_explicit_padding': self._use_explicit_padding,
'use_depthwise': self._use_depthwise,
}
self._mobilenet_v1 = None
self._feature_map_generator = None
def build(self, input_shape):
full_mobilenet_v1 = mobilenet_v1.mobilenet_v1(
batchnorm_training=(self._is_training and not self._freeze_batchnorm),
conv_hyperparams=(self._conv_hyperparams
if self._override_base_feature_extractor_hyperparams
else None),
weights=None,
use_explicit_padding=self._use_explicit_padding,
alpha=self._depth_multiplier,
min_depth=self._min_depth,
include_top=False)
conv2d_11_pointwise = full_mobilenet_v1.get_layer(
name='conv_pw_11_relu').output
conv2d_13_pointwise = full_mobilenet_v1.get_layer(
name='conv_pw_13_relu').output
self._mobilenet_v1 = tf.keras.Model(
inputs=full_mobilenet_v1.inputs,
outputs=[conv2d_11_pointwise, conv2d_13_pointwise])
self._feature_map_generator = (
feature_map_generators.KerasMultiResolutionFeatureMaps(
feature_map_layout=self._feature_map_layout,
depth_multiplier=self._depth_multiplier,
min_depth=self._min_depth,
insert_1x1_conv=True,
is_training=self._is_training,
conv_hyperparams=self._conv_hyperparams,
freeze_batchnorm=self._freeze_batchnorm,
name='FeatureMaps'))
self.built = True
def preprocess(self, resized_inputs):
"""SSD preprocessing.
Maps pixel values to the range [-1, 1].
Args:
resized_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
return (2.0 / 255.0) * resized_inputs - 1.0
def _extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
preprocessed_inputs = shape_utils.check_min_image_dim(
33, preprocessed_inputs)
image_features = self._mobilenet_v1(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple))
feature_maps = self._feature_map_generator({
'Conv2d_11_pointwise': image_features[0],
'Conv2d_13_pointwise': image_features[1]})
return feature_maps.values()
......@@ -19,7 +19,7 @@ import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators
from object_detection.models.keras_applications import mobilenet_v2
from object_detection.models.keras_models import mobilenet_v2
from object_detection.utils import ops
from object_detection.utils import shape_utils
......
......@@ -53,8 +53,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
UNUSED currently.
min_depth: minimum feature extractor depth. UNUSED Currently.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
......@@ -96,9 +95,6 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
use_depthwise=use_depthwise,
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams)
if self._depth_multiplier != 1.0:
raise ValueError('Only depth 1.0 is supported, found: {}'.
format(self._depth_multiplier))
if self._use_explicit_padding is True:
raise ValueError('Explicit padding is not a valid option.')
self._resnet_base_fn = resnet_base_fn
......@@ -150,13 +146,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
Raises:
ValueError: depth multiplier is not supported.
"""
if self._depth_multiplier != 1.0:
raise ValueError('Depth multiplier not supported.')
preprocessed_inputs = shape_utils.check_min_image_dim(
129, preprocessed_inputs)
......@@ -174,8 +164,11 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
global_pool=False,
output_stride=None,
store_non_strided_activations=True,
min_base_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
scope=scope)
image_features = self._filter_features(image_features)
depth_fn = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
with slim.arg_scope(self._conv_hyperparams_fn()):
with tf.variable_scope(self._fpn_scope_name,
reuse=self._reuse_weights):
......@@ -185,7 +178,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
feature_block_list.append('block{}'.format(level - 1))
fpn_features = feature_map_generators.fpn_top_down_feature_maps(
[(key, image_features[key]) for key in feature_block_list],
depth=self._additional_layer_depth)
depth=depth_fn(self._additional_layer_depth))
feature_maps = []
for level in range(self._fpn_min_level, base_fpn_max_level + 1):
feature_maps.append(
......@@ -196,7 +189,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
for i in range(base_fpn_max_level, self._fpn_max_level):
last_feature_map = slim.conv2d(
last_feature_map,
num_outputs=self._additional_layer_depth,
num_outputs=depth_fn(self._additional_layer_depth),
kernel_size=[3, 3],
stride=2,
padding='SAME',
......@@ -226,8 +219,7 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
UNUSED currently.
min_depth: minimum feature extractor depth. UNUSED Currently.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
......@@ -284,8 +276,7 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
UNUSED currently.
min_depth: minimum feature extractor depth. UNUSED Currently.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
......@@ -342,8 +333,7 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
UNUSED currently.
min_depth: minimum feature extractor depth. UNUSED Currently.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
......
......@@ -25,8 +25,7 @@ class SSDResnet50V1FeatureExtractorTest(
"""SSDResnet50v1Fpn feature extractor test."""
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
use_explicit_padding=False):
min_depth = 32
use_explicit_padding=False, min_depth=32):
is_training = True
return ssd_resnet_v1_fpn_feature_extractor.SSDResnet50V1FpnFeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple,
......@@ -42,8 +41,7 @@ class SSDResnet101V1FeatureExtractorTest(
"""SSDResnet101v1Fpn feature extractor test."""
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
use_explicit_padding=False):
min_depth = 32
use_explicit_padding=False, min_depth=32):
is_training = True
return (
ssd_resnet_v1_fpn_feature_extractor.SSDResnet101V1FpnFeatureExtractor(
......@@ -64,8 +62,7 @@ class SSDResnet152V1FeatureExtractorTest(
"""SSDResnet152v1Fpn feature extractor test."""
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
use_explicit_padding=False):
min_depth = 32
use_explicit_padding=False, min_depth=32):
is_training = True
return (
ssd_resnet_v1_fpn_feature_extractor.SSDResnet152V1FpnFeatureExtractor(
......
......@@ -14,6 +14,7 @@
# ==============================================================================
"""Tests for ssd resnet v1 FPN feature extractors."""
import abc
import itertools
import numpy as np
import tensorflow as tf
......@@ -32,6 +33,14 @@ class SSDResnetFPNFeatureExtractorTestBase(
def _fpn_scope_name(self):
return 'fpn'
@abc.abstractmethod
def _create_feature_extractor(self,
depth_multiplier,
pad_to_multiple,
use_explicit_padding=False,
min_depth=32):
pass
def test_extract_features_returns_correct_shapes_256(self):
image_height = 256
image_width = 256
......@@ -56,6 +65,45 @@ class SSDResnetFPNFeatureExtractorTestBase(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_with_depth_multiplier(self):
image_height = 256
image_width = 256
depth_multiplier = 0.5
expected_num_channels = int(256 * depth_multiplier)
pad_to_multiple = 1
expected_feature_map_shape = [(2, 32, 32, expected_num_channels),
(2, 16, 16, expected_num_channels),
(2, 8, 8, expected_num_channels),
(2, 4, 4, expected_num_channels),
(2, 2, 2, expected_num_channels)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_with_min_depth(self):
image_height = 256
image_width = 256
depth_multiplier = 1.0
pad_to_multiple = 1
min_depth = 320
expected_feature_map_shape = [(2, 32, 32, min_depth),
(2, 16, 16, min_depth),
(2, 8, 8, min_depth),
(2, 4, 4, min_depth),
(2, 2, 2, min_depth)]
def graph_fn(image_tensor):
feature_extractor = self._create_feature_extractor(
depth_multiplier, pad_to_multiple, min_depth=min_depth)
return feature_extractor.extract_features(image_tensor)
image_tensor = np.random.rand(2, image_height, image_width,
3).astype(np.float32)
feature_maps = self.execute(graph_fn, [image_tensor])
for feature_map, expected_shape in itertools.izip(
feature_maps, expected_feature_map_shape):
self.assertAllEqual(feature_map.shape, expected_shape)
def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
image_height = 254
image_width = 254
......
......@@ -54,8 +54,8 @@
"sys.path.append(\"..\")\n",
"from object_detection.utils import ops as utils_ops\n",
"\n",
"if StrictVersion(tf.__version__) \u003c StrictVersion('1.9.0'):\n",
" raise ImportError('Please upgrade your TensorFlow installation to v1.9.* or later!')\n"
"if StrictVersion(tf.__version__) \u003c StrictVersion('1.12.0'):\n",
" raise ImportError('Please upgrade your TensorFlow installation to v1.12.*.')\n"
]
},
{
......
......@@ -108,14 +108,16 @@ class ConvolutionalBoxPredictor(box_predictor.BoxPredictor):
feature map.
Returns:
box_encodings: A list of float tensors of shape
[batch_size, num_anchors_i, q, code_size] representing the location of
the objects, where q is 1 or the number of classes. Each entry in the
list corresponds to a feature map in the input `image_features` list.
class_predictions_with_background: A list of float tensors of shape
[batch_size, num_anchors_i, num_classes + 1] representing the class
predictions for the proposals. Each entry in the list corresponds to a
feature map in the input `image_features` list.
A dictionary containing:
box_encodings: A list of float tensors of shape
[batch_size, num_anchors_i, q, code_size] representing the location of
the objects, where q is 1 or the number of classes. Each entry in the
list corresponds to a feature map in the input `image_features` list.
class_predictions_with_background: A list of float tensors of shape
[batch_size, num_anchors_i, num_classes + 1] representing the class
predictions for the proposals. Each entry in the list corresponds to a
feature map in the input `image_features` list.
(optional) Predictions from other heads.
"""
predictions = {
BOX_ENCODINGS: [],
......@@ -226,8 +228,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
kernel_size: Size of final convolution kernel.
apply_batch_norm: Whether to apply batch normalization to conv layers in
this predictor.
share_prediction_tower: Whether to share the multi-layer tower between box
prediction and class prediction heads.
share_prediction_tower: Whether to share the multi-layer tower among box
prediction head, class prediction head and other heads.
use_depthwise: Whether to use depthwise separable conv2d instead of
regular conv2d.
"""
......@@ -270,9 +272,7 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
inserted_layer_counter += 1
return image_feature, inserted_layer_counter
def _compute_base_tower(self, tower_name_scope, image_feature, feature_index,
has_different_feature_channels, target_channel,
inserted_layer_counter):
def _compute_base_tower(self, tower_name_scope, image_feature, feature_index):
net = image_feature
for i in range(self._num_layers_before_predictor):
if self._use_depthwise:
......@@ -296,23 +296,18 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
return net
def _predict_head(self, head_name, head_obj, image_feature, box_tower_feature,
feature_index, has_different_feature_channels,
target_channel, inserted_layer_counter,
num_predictions_per_location):
feature_index, num_predictions_per_location):
if head_name == CLASS_PREDICTIONS_WITH_BACKGROUND:
tower_name_scope = 'ClassPredictionTower'
else:
raise ValueError('Unknown head')
tower_name_scope = head_name + 'PredictionTower'
if self._share_prediction_tower:
head_tower_feature = box_tower_feature
else:
head_tower_feature = self._compute_base_tower(
tower_name_scope=tower_name_scope,
image_feature=image_feature,
feature_index=feature_index,
has_different_feature_channels=has_different_feature_channels,
target_channel=target_channel,
inserted_layer_counter=inserted_layer_counter)
feature_index=feature_index)
return head_obj.predict(
features=head_tower_feature,
num_predictions_per_location=num_predictions_per_location)
......@@ -341,13 +336,13 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
[batch_size, num_anchors_i, num_classes + 1] representing the class
predictions for the proposals. Each entry in the list corresponds to a
feature map in the input `image_features` list.
(optional) mask_predictions: A list of float tensors of shape
(optional) Predictions from other heads.
E.g., mask_predictions: A list of float tensors of shape
[batch_size, num_anchord_i, num_classes, mask_height, mask_width].
Raises:
ValueError: If the image feature maps do not have the same number of
channels or if the num predictions per locations is differs between the
ValueError: If the num predictions per locations differs between the
feature maps.
"""
if len(set(num_predictions_per_location_list)) > 1:
......@@ -392,10 +387,7 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
box_tower_feature = self._compute_base_tower(
tower_name_scope=box_tower_scope,
image_feature=image_feature,
feature_index=feature_index,
has_different_feature_channels=has_different_feature_channels,
target_channel=target_channel,
inserted_layer_counter=inserted_layer_counter)
feature_index=feature_index)
box_encodings = self._box_prediction_head.predict(
features=box_tower_feature,
num_predictions_per_location=num_predictions_per_location)
......@@ -413,9 +405,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
image_feature=image_feature,
box_tower_feature=box_tower_feature,
feature_index=feature_index,
has_different_feature_channels=has_different_feature_channels,
target_channel=target_channel,
inserted_layer_counter=inserted_layer_counter,
num_predictions_per_location=num_predictions_per_location)
predictions[head_name].append(prediction)
return predictions
......@@ -14,6 +14,8 @@
# ==============================================================================
"""Tests for object_detection.predictors.convolutional_box_predictor."""
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
......@@ -21,6 +23,9 @@ from google.protobuf import text_format
from object_detection.builders import box_predictor_builder
from object_detection.builders import hyperparams_builder
from object_detection.predictors import convolutional_box_predictor as box_predictor
from object_detection.predictors.heads import box_head
from object_detection.predictors.heads import class_head
from object_detection.predictors.heads import mask_head
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
......@@ -852,5 +857,66 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
self.assertAllEqual(objectness_predictions_shape,
[4, expected_num_anchors, 1])
def test_other_heads_predictions(self):
box_code_size = 4
num_classes_without_background = 3
other_head_name = 'Mask'
mask_height = 5
mask_width = 5
num_predictions_per_location = 5
def graph_fn(image_features):
box_prediction_head = box_head.WeightSharedConvolutionalBoxHead(
box_code_size)
class_prediction_head = class_head.WeightSharedConvolutionalClassHead(
num_classes_without_background + 1)
other_heads = {
other_head_name:
mask_head.WeightSharedConvolutionalMaskHead(
num_classes_without_background,
mask_height=mask_height,
mask_width=mask_width)
}
conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
is_training=False,
num_classes=num_classes_without_background,
box_prediction_head=box_prediction_head,
class_prediction_head=class_prediction_head,
other_heads=other_heads,
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
depth=32,
num_layers_before_predictor=2)
box_predictions = conv_box_predictor.predict(
[image_features],
num_predictions_per_location=[num_predictions_per_location],
scope='BoxPredictor')
for key, value in box_predictions.items():
box_predictions[key] = tf.concat(value, axis=1)
assert len(box_predictions) == 3
return (box_predictions[box_predictor.BOX_ENCODINGS],
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
box_predictions[other_head_name])
batch_size = 4
feature_ht = 8
feature_wt = 8
image_features = np.random.rand(batch_size, feature_ht, feature_wt,
64).astype(np.float32)
(box_encodings, class_predictions, other_head_predictions) = self.execute(
graph_fn, [image_features])
num_anchors = feature_ht * feature_wt * num_predictions_per_location
self.assertAllEqual(box_encodings.shape,
[batch_size, num_anchors, box_code_size])
self.assertAllEqual(
class_predictions.shape,
[batch_size, num_anchors, num_classes_without_background + 1])
self.assertAllEqual(other_head_predictions.shape, [
batch_size, num_anchors, num_classes_without_background, mask_height,
mask_width
])
if __name__ == '__main__':
tf.test.main()
......@@ -191,7 +191,69 @@ class ConvolutionalKerasBoxPredictorTest(test_case.TestCase):
self.assertEqual(conv_box_predictor._sorted_head_names,
['box_encodings', 'class_predictions_with_background'])
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
def test_use_depthwise_convolution(self):
image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64])
conv_box_predictor = (
box_predictor_builder.build_convolutional_keras_box_predictor(
is_training=False,
num_classes=0,
conv_hyperparams=self._build_conv_hyperparams(),
freeze_batchnorm=False,
inplace_batchnorm_update=False,
num_predictions_per_location_list=[5],
min_depth=0,
max_depth=32,
num_layers_before_predictor=1,
use_dropout=True,
dropout_keep_prob=0.8,
kernel_size=1,
box_code_size=4,
use_depthwise=True
))
box_predictions = conv_box_predictor([image_features])
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
objectness_predictions = tf.concat(
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
axis=1)
init_op = tf.global_variables_initializer()
resolution = 32
expected_num_anchors = resolution*resolution*5
with self.test_session() as sess:
sess.run(init_op)
(box_encodings_shape,
objectness_predictions_shape) = sess.run(
[tf.shape(box_encodings), tf.shape(objectness_predictions)],
feed_dict={image_features:
np.random.rand(4, resolution, resolution, 64)})
actual_variable_set = set(
[var.op.name for var in tf.trainable_variables()])
self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4])
self.assertAllEqual(objectness_predictions_shape,
[4, expected_num_anchors, 1])
expected_variable_set = set([
'BoxPredictor/SharedConvolutions_0/Conv2d_0_1x1_32/bias',
'BoxPredictor/SharedConvolutions_0/Conv2d_0_1x1_32/kernel',
'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor_depthwise/'
'bias',
'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor_depthwise/'
'depthwise_kernel',
'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/bias',
'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/kernel',
'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor_depthwise/bias',
'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor_depthwise/'
'depthwise_kernel',
'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor/bias',
'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor/kernel'])
self.assertEqual(expected_variable_set, actual_variable_set)
self.assertEqual(conv_box_predictor._sorted_head_names,
['box_encodings', 'class_predictions_with_background'])
if __name__ == '__main__':
tf.test.main()
......@@ -56,7 +56,20 @@ class ConvolutionalKerasBoxHeadTest(test_case.TestCase):
box_encodings = box_prediction_head(image_feature)
self.assertAllEqual([64, 323, 1, 4], box_encodings.get_shape().as_list())
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
def test_prediction_size_depthwise_true(self):
conv_hyperparams = self._build_conv_hyperparams()
box_prediction_head = keras_box_head.ConvolutionalBoxHead(
is_training=True,
box_code_size=4,
kernel_size=3,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=False,
num_predictions_per_location=1,
use_depthwise=True)
image_feature = tf.random_uniform(
[64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
box_encodings = box_prediction_head(image_feature)
self.assertAllEqual([64, 323, 1, 4], box_encodings.get_shape().as_list())
if __name__ == '__main__':
tf.test.main()
......@@ -59,7 +59,23 @@ class ConvolutionalKerasClassPredictorTest(test_case.TestCase):
self.assertAllEqual([64, 323, 20],
class_predictions.get_shape().as_list())
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
def test_prediction_size_depthwise_true(self):
conv_hyperparams = self._build_conv_hyperparams()
class_prediction_head = keras_class_head.ConvolutionalClassHead(
is_training=True,
num_class_slots=20,
use_dropout=True,
dropout_keep_prob=0.5,
kernel_size=3,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=False,
num_predictions_per_location=1,
use_depthwise=True)
image_feature = tf.random_uniform(
[64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
class_predictions = class_prediction_head(image_feature,)
self.assertAllEqual([64, 323, 20],
class_predictions.get_shape().as_list())
if __name__ == '__main__':
tf.test.main()
......@@ -61,7 +61,25 @@ class ConvolutionalMaskPredictorTest(test_case.TestCase):
self.assertAllEqual([64, 323, 20, 7, 7],
mask_predictions.get_shape().as_list())
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
def test_prediction_size_use_depthwise_true(self):
conv_hyperparams = self._build_conv_hyperparams()
mask_prediction_head = keras_mask_head.ConvolutionalMaskHead(
is_training=True,
num_classes=20,
use_dropout=True,
dropout_keep_prob=0.5,
kernel_size=3,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=False,
num_predictions_per_location=1,
use_depthwise=True,
mask_height=7,
mask_width=7)
image_feature = tf.random_uniform(
[64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
mask_predictions = mask_prediction_head(image_feature)
self.assertAllEqual([64, 323, 20, 7, 7],
mask_predictions.get_shape().as_list())
def test_class_agnostic_prediction_size_use_depthwise_false(self):
conv_hyperparams = self._build_conv_hyperparams()
......@@ -84,7 +102,26 @@ class ConvolutionalMaskPredictorTest(test_case.TestCase):
self.assertAllEqual([64, 323, 1, 7, 7],
mask_predictions.get_shape().as_list())
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
def test_class_agnostic_prediction_size_use_depthwise_true(self):
conv_hyperparams = self._build_conv_hyperparams()
mask_prediction_head = keras_mask_head.ConvolutionalMaskHead(
is_training=True,
num_classes=20,
use_dropout=True,
dropout_keep_prob=0.5,
kernel_size=3,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=False,
num_predictions_per_location=1,
use_depthwise=True,
mask_height=7,
mask_width=7,
masks_are_class_agnostic=True)
image_feature = tf.random_uniform(
[64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
mask_predictions = mask_prediction_head(image_feature)
self.assertAllEqual([64, 323, 1, 7, 7],
mask_predictions.get_shape().as_list())
if __name__ == '__main__':
tf.test.main()
syntax = "proto2";
package object_detection.protos;
// Message wrapper for various calibration configurations
message CalibrationConfig {
oneof calibrator {
// Class-agnostic calibration via linear interpolation (usually output from
// isotonic regression)
FunctionApproximation function_approximation = 1;
// Per-class calibration via linear interpolation
LabelFunctionApproximations label_function_approximations = 2;
// Class-agnostic sigmoid calibration
SigmoidCalibration sigmoid_calibration = 3;
// Per-class sigmoid calibration
LabelSigmoidCalibrations label_sigmoid_calibrations = 4;
}
}
// Message for class-agnostic domain/range mapping for function
// approximations
message FunctionApproximation {
// Message mapping class labels to indices
optional XYPairs x_y_pairs = 1;
}
// Message for class-specific domain/range mapping for function
// approximations
message LabelFunctionApproximations {
// Message mapping class labels to indices
map<string, XYPairs> label_xy_pairs_map = 1;
// Label map to map label names from to class ids.
optional string label_map_path = 2;
}
// Message for class-agnostic Sigmoid Calibration
message SigmoidCalibration {
// Message mapping class index to Sigmoid Parameters
optional SigmoidParameters sigmoid_parameters = 1;
}
// Message for class-specific Sigmoid Calibration
message LabelSigmoidCalibrations {
// Message mapping class index to Sigmoid Parameters
map<string, SigmoidParameters> label_sigmoid_parameters_map = 1;
// Label map to map label names from to class ids.
optional string label_map_path = 2;
}
// Message to store a domain/range pair for function to be approximated
message XYPairs {
message XYPair {
optional float x = 1;
optional float y = 2;
}
// Sequence of x/y pairs for function approximation
repeated XYPair x_y_pair = 1;
}
// Message defining parameters for sigmoid calibration.
message SigmoidParameters {
optional float a = 1 [default = -1.0];
optional float b = 2 [default = 0.0];
}
......@@ -8,6 +8,7 @@ message ImageResizer {
oneof image_resizer_oneof {
KeepAspectRatioResizer keep_aspect_ratio_resizer = 1;
FixedShapeResizer fixed_shape_resizer = 2;
IdentityResizer identity_resizer = 3;
}
}
......@@ -19,6 +20,9 @@ enum ResizeType {
AREA = 3; // Corresponds to tf.image.ResizeMethod.AREA
}
message IdentityResizer {
}
// Configuration proto for image resizer that keeps aspect ratio.
message KeepAspectRatioResizer {
// Desired size of the smaller image dimension in pixels.
......
......@@ -22,7 +22,7 @@ enum InstanceMaskType {
PNG_MASKS = 2; // Encoded PNG masks.
}
// Next id: 24
// Next id: 25
message InputReader {
// Name of input reader. Typically used to describe the dataset that is read
// by this input reader.
......@@ -94,6 +94,9 @@ message InputReader {
// otherwise some groundtruth boxes may be clipped.
optional int32 max_number_of_boxes = 21 [default=100];
// Whether to load multiclass scores from the dataset.
optional bool load_multiclass_scores = 24 [default = false];
// Whether to load groundtruth instance masks.
optional bool load_instance_masks = 7 [default = false];
......
......@@ -38,6 +38,7 @@ message AdamOptimizer {
optional LearningRate learning_rate = 1;
}
// Configuration message for optimizer learning rate.
message LearningRate {
oneof learning_rate {
......
......@@ -2,6 +2,8 @@ syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/calibration.proto";
// Configuration proto for non-max-suppression operation on a batch of
// detections.
message BatchNonMaxSuppression {
......@@ -46,4 +48,7 @@ message PostProcessing {
// Typically used for softmax distillation, though can be used to scale for
// other reasons.
optional float logit_scale = 3 [default = 1.0];
// Calibrate score outputs. Calibration is applied after score converter
// and before non max suppression.
optional CalibrationConfig calibration_config = 4;
}
......@@ -34,6 +34,8 @@ message PreprocessingStep {
RandomRotation90 random_rotation90 = 26;
RGBtoGray rgb_to_gray = 27;
ConvertClassLogitsToSoftmax convert_class_logits_to_softmax = 28;
RandomAbsolutePadImage random_absolute_pad_image = 29;
RandomSelfConcatImage random_self_concat_image = 30;
}
}
......@@ -179,6 +181,18 @@ message RandomPadImage {
repeated float pad_color = 5;
}
// Randomly adds a padding of size [0, max_height_padding), [0, max_width_padding).
message RandomAbsolutePadImage {
// Height will be padded uniformly at random from [0, max_height_padding).
optional int32 max_height_padding = 1;
// Width will be padded uniformly at random from [0, max_width_padding).
optional int32 max_width_padding = 2;
// Color of the padding. If unset, will pad using average color of the input
// image.
repeated float pad_color = 3;
}
// Randomly crops an image followed by a random pad.
message RandomCropPadImage {
// Cropping operation must cover at least one box by this fraction.
......@@ -243,8 +257,8 @@ message RandomBlackPatches {
// Randomly resizes the image up to [target_height, target_width].
message RandomResizeMethod {
optional float target_height = 1;
optional float target_width = 2;
optional int32 target_height = 1;
optional int32 target_width = 2;
}
// Converts the RGB image to a grayscale image. This also converts the image
......@@ -439,3 +453,11 @@ message ConvertClassLogitsToSoftmax {
// Scale to use on logits before applying softmax.
optional float temperature = 1 [default=1.0];
}
// Randomly concatenates the image with itself horizontally and/or vertically.
message RandomSelfConcatImage {
// Probability of concatenating the image vertically.
optional float concat_vertical_probability = 1 [default = 0.1];
// Probability of concatenating the image horizontally.
optional float concat_horizontal_probability = 2 [default = 0.1];
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment