Commit 30aeec75 authored by Toby Boyd's avatar Toby Boyd Committed by GitHub
Browse files

Merge pull request #2 from tensorflow/master

Sync to tensorflow-master
parents 68a18b70 78007443
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.models.faster_rcnn_resnet_v1_feature_extractor."""
import numpy as np
import tensorflow as tf
from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as faster_rcnn_resnet_v1
class FasterRcnnResnetV1FeatureExtractorTest(tf.test.TestCase):
def _build_feature_extractor(self,
first_stage_features_stride,
architecture='resnet_v1_101'):
feature_extractor_map = {
'resnet_v1_50':
faster_rcnn_resnet_v1.FasterRCNNResnet50FeatureExtractor,
'resnet_v1_101':
faster_rcnn_resnet_v1.FasterRCNNResnet101FeatureExtractor,
'resnet_v1_152':
faster_rcnn_resnet_v1.FasterRCNNResnet152FeatureExtractor
}
return feature_extractor_map[architecture](
is_training=False,
first_stage_features_stride=first_stage_features_stride,
reuse_weights=None,
weight_decay=0.0)
def test_extract_proposal_features_returns_expected_size(self):
for architecture in ['resnet_v1_50', 'resnet_v1_101', 'resnet_v1_152']:
feature_extractor = self._build_feature_extractor(
first_stage_features_stride=16, architecture=architecture)
preprocessed_inputs = tf.random_uniform(
[4, 224, 224, 3], maxval=255, dtype=tf.float32)
rpn_feature_map = feature_extractor.extract_proposal_features(
preprocessed_inputs, scope='TestScope')
features_shape = tf.shape(rpn_feature_map)
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
features_shape_out = sess.run(features_shape)
self.assertAllEqual(features_shape_out, [4, 14, 14, 1024])
def test_extract_proposal_features_stride_eight(self):
feature_extractor = self._build_feature_extractor(
first_stage_features_stride=8)
preprocessed_inputs = tf.random_uniform(
[4, 224, 224, 3], maxval=255, dtype=tf.float32)
rpn_feature_map = feature_extractor.extract_proposal_features(
preprocessed_inputs, scope='TestScope')
features_shape = tf.shape(rpn_feature_map)
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
features_shape_out = sess.run(features_shape)
self.assertAllEqual(features_shape_out, [4, 28, 28, 1024])
def test_extract_proposal_features_half_size_input(self):
feature_extractor = self._build_feature_extractor(
first_stage_features_stride=16)
preprocessed_inputs = tf.random_uniform(
[1, 112, 112, 3], maxval=255, dtype=tf.float32)
rpn_feature_map = feature_extractor.extract_proposal_features(
preprocessed_inputs, scope='TestScope')
features_shape = tf.shape(rpn_feature_map)
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
features_shape_out = sess.run(features_shape)
self.assertAllEqual(features_shape_out, [1, 7, 7, 1024])
def test_extract_proposal_features_dies_on_invalid_stride(self):
with self.assertRaises(ValueError):
self._build_feature_extractor(first_stage_features_stride=99)
def test_extract_proposal_features_dies_on_very_small_images(self):
feature_extractor = self._build_feature_extractor(
first_stage_features_stride=16)
preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
rpn_feature_map = feature_extractor.extract_proposal_features(
preprocessed_inputs, scope='TestScope')
features_shape = tf.shape(rpn_feature_map)
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
with self.assertRaises(tf.errors.InvalidArgumentError):
sess.run(
features_shape,
feed_dict={preprocessed_inputs: np.random.rand(4, 32, 32, 3)})
def test_extract_proposal_features_dies_with_incorrect_rank_inputs(self):
feature_extractor = self._build_feature_extractor(
first_stage_features_stride=16)
preprocessed_inputs = tf.random_uniform(
[224, 224, 3], maxval=255, dtype=tf.float32)
with self.assertRaises(ValueError):
feature_extractor.extract_proposal_features(
preprocessed_inputs, scope='TestScope')
def test_extract_box_classifier_features_returns_expected_size(self):
feature_extractor = self._build_feature_extractor(
first_stage_features_stride=16)
proposal_feature_maps = tf.random_uniform(
[3, 7, 7, 1024], maxval=255, dtype=tf.float32)
proposal_classifier_features = (
feature_extractor.extract_box_classifier_features(
proposal_feature_maps, scope='TestScope'))
features_shape = tf.shape(proposal_classifier_features)
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
features_shape_out = sess.run(features_shape)
self.assertAllEqual(features_shape_out, [3, 7, 7, 2048])
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functions to generate a list of feature maps based on image features.
Provides several feature map generators that can be used to build object
detection feature extractors.
Object detection feature extractors usually are built by stacking two components
- A base feature extractor such as Inception V3 and a feature map generator.
Feature map generators build on the base feature extractors and produce a list
of final feature maps.
"""
import collections
import tensorflow as tf
from object_detection.utils import ops
slim = tf.contrib.slim
def get_depth_fn(depth_multiplier, min_depth):
"""Builds a callable to compute depth (output channels) of conv filters.
Args:
depth_multiplier: a multiplier for the nominal depth.
min_depth: a lower bound on the depth of filters.
Returns:
A callable that takes in a nominal depth and returns the depth to use.
"""
def multiply_depth(depth):
new_depth = int(depth * depth_multiplier)
return max(new_depth, min_depth)
return multiply_depth
def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
min_depth, insert_1x1_conv, image_features):
"""Generates multi resolution feature maps from input image features.
Generates multi-scale feature maps for detection as in the SSD papers by
Liu et al: https://arxiv.org/pdf/1512.02325v2.pdf, See Sec 2.1.
More specifically, it performs the following two tasks:
1) If a layer name is provided in the configuration, returns that layer as a
feature map.
2) If a layer name is left as an empty string, constructs a new feature map
based on the spatial shape and depth configuration. Note that the current
implementation only supports generating new layers using convolution of
stride 2 resulting in a spatial resolution reduction by a factor of 2.
An example of the configuration for Inception V3:
{
'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 128],
'anchor_strides': [16, 32, 64, -1, -1, -1]
}
Args:
feature_map_layout: Dictionary of specifications for the feature map
layouts in the following format (Inception V2/V3 respectively):
{
'from_layer': ['Mixed_3c', 'Mixed_4c', 'Mixed_5c', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 128],
'anchor_strides': [16, 32, 64, -1, -1, -1]
}
or
{
'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 128],
'anchor_strides': [16, 32, 64, -1, -1, -1]
}
If 'from_layer' is specified, the specified feature map is directly used
as a box predictor layer, and the layer_depth is directly infered from the
feature map (instead of using the provided 'layer_depth' parameter). In
this case, our convention is to set 'layer_depth' to -1 for clarity.
Otherwise, if 'from_layer' is an empty string, then the box predictor
layer will be built from the previous layer using convolution operations.
Note that the current implementation only supports generating new layers
using convolutions of stride 2 (resulting in a spatial resolution
reduction by a factor of 2), and will be extended to a more flexible
design. Finally, the optional 'anchor_strides' can be used to specify the
anchor stride at each layer where 'from_layer' is specified. Our
convention is to set 'anchor_strides' to -1 whenever at the positions that
'from_layer' is an empty string, and anchor strides at these layers will
be inferred from the previous layer's anchor strides and the current
layer's stride length. In the case where 'anchor_strides' is not
specified, the anchor strides will default to the image width and height
divided by the number of anchors.
depth_multiplier: Depth multiplier for convolutional layers.
min_depth: Minimum depth for convolutional layers.
insert_1x1_conv: A boolean indicating whether an additional 1x1 convolution
should be inserted before shrinking the feature map.
image_features: A dictionary of handles to activation tensors from the
base feature extractor.
Returns:
feature_maps: an OrderedDict mapping keys (feature map names) to
tensors where each tensor has shape [batch, height_i, width_i, depth_i].
Raises:
ValueError: if the number entries in 'from_layer' and
'layer_depth' do not match.
ValueError: if the generated layer does not have the same resolution
as specified.
"""
depth_fn = get_depth_fn(depth_multiplier, min_depth)
feature_map_keys = []
feature_maps = []
base_from_layer = ''
feature_map_strides = None
use_depthwise = False
if 'anchor_strides' in feature_map_layout:
feature_map_strides = (feature_map_layout['anchor_strides'])
if 'use_depthwise' in feature_map_layout:
use_depthwise = feature_map_layout['use_depthwise']
for index, (from_layer, layer_depth) in enumerate(
zip(feature_map_layout['from_layer'], feature_map_layout['layer_depth'])):
if from_layer:
feature_map = image_features[from_layer]
base_from_layer = from_layer
feature_map_keys.append(from_layer)
else:
pre_layer = feature_maps[-1]
intermediate_layer = pre_layer
if insert_1x1_conv:
layer_name = '{}_1_Conv2d_{}_1x1_{}'.format(
base_from_layer, index, depth_fn(layer_depth / 2))
intermediate_layer = slim.conv2d(
pre_layer,
depth_fn(layer_depth / 2), [1, 1],
padding='SAME',
stride=1,
scope=layer_name)
stride = 2
layer_name = '{}_2_Conv2d_{}_3x3_s2_{}'.format(
base_from_layer, index, depth_fn(layer_depth))
if use_depthwise:
feature_map = slim.separable_conv2d(
ops.pad_to_multiple(intermediate_layer, stride),
None, [3, 3],
depth_multiplier=1,
padding='SAME',
stride=stride,
scope=layer_name + '_depthwise')
feature_map = slim.conv2d(
feature_map,
depth_fn(layer_depth), [1, 1],
padding='SAME',
stride=1,
scope=layer_name)
else:
feature_map = slim.conv2d(
ops.pad_to_multiple(intermediate_layer, stride),
depth_fn(layer_depth), [3, 3],
padding='SAME',
stride=stride,
scope=layer_name)
if (index > 0 and feature_map_strides and
feature_map_strides[index - 1] > 0):
feature_map_strides[index] = (
stride * feature_map_strides[index - 1])
feature_map_keys.append(layer_name)
feature_maps.append(feature_map)
return collections.OrderedDict(
[(x, y) for (x, y) in zip(feature_map_keys, feature_maps)])
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for feature map generators."""
import tensorflow as tf
from object_detection.models import feature_map_generators
INCEPTION_V2_LAYOUT = {
'from_layer': ['Mixed_3c', 'Mixed_4c', 'Mixed_5c', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 256],
'anchor_strides': [16, 32, 64, -1, -1, -1],
'layer_target_norm': [20.0, -1, -1, -1, -1, -1],
}
INCEPTION_V3_LAYOUT = {
'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 128],
'anchor_strides': [16, 32, 64, -1, -1, -1],
'aspect_ratios': [1.0, 2.0, 1.0/2, 3.0, 1.0/3]
}
# TODO: add tests with different anchor strides.
class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
def test_get_expected_feature_map_shapes_with_inception_v2(self):
image_features = {
'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32),
'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32)
}
feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_map_layout=INCEPTION_V2_LAYOUT,
depth_multiplier=1,
min_depth=32,
insert_1x1_conv=True,
image_features=image_features)
expected_feature_map_shapes = {
'Mixed_3c': (4, 28, 28, 256),
'Mixed_4c': (4, 14, 14, 576),
'Mixed_5c': (4, 7, 7, 1024),
'Mixed_5c_2_Conv2d_3_3x3_s2_512': (4, 4, 4, 512),
'Mixed_5c_2_Conv2d_4_3x3_s2_256': (4, 2, 2, 256),
'Mixed_5c_2_Conv2d_5_3x3_s2_256': (4, 1, 1, 256)}
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
out_feature_maps = sess.run(feature_maps)
out_feature_map_shapes = dict(
(key, value.shape) for key, value in out_feature_maps.items())
self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
def test_get_expected_feature_map_shapes_with_inception_v3(self):
image_features = {
'Mixed_5d': tf.random_uniform([4, 35, 35, 256], dtype=tf.float32),
'Mixed_6e': tf.random_uniform([4, 17, 17, 576], dtype=tf.float32),
'Mixed_7c': tf.random_uniform([4, 8, 8, 1024], dtype=tf.float32)
}
feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_map_layout=INCEPTION_V3_LAYOUT,
depth_multiplier=1,
min_depth=32,
insert_1x1_conv=True,
image_features=image_features)
expected_feature_map_shapes = {
'Mixed_5d': (4, 35, 35, 256),
'Mixed_6e': (4, 17, 17, 576),
'Mixed_7c': (4, 8, 8, 1024),
'Mixed_7c_2_Conv2d_3_3x3_s2_512': (4, 4, 4, 512),
'Mixed_7c_2_Conv2d_4_3x3_s2_256': (4, 2, 2, 256),
'Mixed_7c_2_Conv2d_5_3x3_s2_128': (4, 1, 1, 128)}
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
out_feature_maps = sess.run(feature_maps)
out_feature_map_shapes = dict(
(key, value.shape) for key, value in out_feature_maps.items())
self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
class GetDepthFunctionTest(tf.test.TestCase):
def test_return_min_depth_when_multiplier_is_small(self):
depth_fn = feature_map_generators.get_depth_fn(depth_multiplier=0.5,
min_depth=16)
self.assertEqual(depth_fn(16), 16)
def test_return_correct_depth_with_multiplier(self):
depth_fn = feature_map_generators.get_depth_fn(depth_multiplier=0.5,
min_depth=16)
self.assertEqual(depth_fn(64), 32)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Base test class SSDFeatureExtractors."""
from abc import abstractmethod
import numpy as np
import tensorflow as tf
class SsdFeatureExtractorTestBase(object):
def _validate_features_shape(self,
feature_extractor,
preprocessed_inputs,
expected_feature_map_shapes):
"""Checks the extracted features are of correct shape.
Args:
feature_extractor: The feature extractor to test.
preprocessed_inputs: A [batch, height, width, 3] tensor to extract
features with.
expected_feature_map_shapes: The expected shape of the extracted features.
"""
feature_maps = feature_extractor.extract_features(preprocessed_inputs)
feature_map_shapes = [tf.shape(feature_map) for feature_map in feature_maps]
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
feature_map_shapes_out = sess.run(feature_map_shapes)
for shape_out, exp_shape_out in zip(
feature_map_shapes_out, expected_feature_map_shapes):
self.assertAllEqual(shape_out, exp_shape_out)
@abstractmethod
def _create_feature_extractor(self, depth_multiplier):
"""Constructs a new feature extractor.
Args:
depth_multiplier: float depth multiplier for feature extractor
Returns:
an ssd_meta_arch.SSDFeatureExtractor object.
"""
pass
def check_extract_features_returns_correct_shape(
self,
image_height,
image_width,
depth_multiplier,
expected_feature_map_shapes_out):
feature_extractor = self._create_feature_extractor(depth_multiplier)
preprocessed_inputs = tf.random_uniform(
[4, image_height, image_width, 3], dtype=tf.float32)
self._validate_features_shape(
feature_extractor, preprocessed_inputs, expected_feature_map_shapes_out)
def check_extract_features_raises_error_with_invalid_image_size(
self,
image_height,
image_width,
depth_multiplier):
feature_extractor = self._create_feature_extractor(depth_multiplier)
preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
feature_maps = feature_extractor.extract_features(preprocessed_inputs)
test_preprocessed_image = np.random.rand(4, image_height, image_width, 3)
with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
with self.assertRaises(tf.errors.InvalidArgumentError):
sess.run(feature_maps,
feed_dict={preprocessed_inputs: test_preprocessed_image})
def check_feature_extractor_variables_under_scope(self,
depth_multiplier,
scope_name):
g = tf.Graph()
with g.as_default():
feature_extractor = self._create_feature_extractor(depth_multiplier)
preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
feature_extractor.extract_features(preprocessed_inputs)
variables = g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
for variable in variables:
self.assertTrue(variable.name.startswith(scope_name))
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""SSDFeatureExtractor for InceptionV2 features."""
import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators
from nets import inception_v2
slim = tf.contrib.slim
class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""SSD Feature Extractor using InceptionV2 features."""
def __init__(self,
depth_multiplier,
min_depth,
conv_hyperparams,
reuse_weights=None):
"""InceptionV2 Feature Extractor for SSD Models.
Args:
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
reuse_weights: Whether to reuse variables. Default is None.
"""
super(SSDInceptionV2FeatureExtractor, self).__init__(
depth_multiplier, min_depth, conv_hyperparams, reuse_weights)
def preprocess(self, resized_inputs):
"""SSD preprocessing.
Maps pixel values to the range [-1, 1].
Args:
resized_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
return (2.0 / 255.0) * resized_inputs - 1.0
def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
preprocessed_inputs.get_shape().assert_has_rank(4)
shape_assert = tf.Assert(
tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
['image size must at least be 33 in both height and width.'])
feature_map_layout = {
'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', ''],
'layer_depth': [-1, -1, 512, 256, 256, 128],
}
with tf.control_dependencies([shape_assert]):
with slim.arg_scope(self._conv_hyperparams):
with tf.variable_scope('InceptionV2',
reuse=self._reuse_weights) as scope:
_, image_features = inception_v2.inception_v2_base(
preprocessed_inputs,
final_endpoint='Mixed_5c',
min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
scope=scope)
feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_map_layout=feature_map_layout,
depth_multiplier=self._depth_multiplier,
min_depth=self._min_depth,
insert_1x1_conv=True,
image_features=image_features)
return feature_maps.values()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.models.ssd_inception_v2_feature_extractor."""
import numpy as np
import tensorflow as tf
from object_detection.models import ssd_feature_extractor_test
from object_detection.models import ssd_inception_v2_feature_extractor
class SsdInceptionV2FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase,
tf.test.TestCase):
def _create_feature_extractor(self, depth_multiplier):
"""Constructs a SsdInceptionV2FeatureExtractor.
Args:
depth_multiplier: float depth multiplier for feature extractor
Returns:
an ssd_inception_v2_feature_extractor.SsdInceptionV2FeatureExtractor.
"""
min_depth = 32
conv_hyperparams = {}
return ssd_inception_v2_feature_extractor.SSDInceptionV2FeatureExtractor(
depth_multiplier, min_depth, conv_hyperparams)
def test_extract_features_returns_correct_shapes_128(self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
expected_feature_map_shape = [(4, 8, 8, 576), (4, 4, 4, 1024),
(4, 2, 2, 512), (4, 1, 1, 256),
(4, 1, 1, 256), (4, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_299(self):
image_height = 299
image_width = 299
depth_multiplier = 1.0
expected_feature_map_shape = [(4, 19, 19, 576), (4, 10, 10, 1024),
(4, 5, 5, 512), (4, 3, 3, 256),
(4, 2, 2, 256), (4, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
image_height = 299
image_width = 299
depth_multiplier = 0.5**12
expected_feature_map_shape = [(4, 19, 19, 128), (4, 10, 10, 128),
(4, 5, 5, 32), (4, 3, 3, 32),
(4, 2, 2, 32), (4, 1, 1, 32)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, expected_feature_map_shape)
def test_extract_features_raises_error_with_invalid_image_size(self):
image_height = 32
image_width = 32
depth_multiplier = 1.0
self.check_extract_features_raises_error_with_invalid_image_size(
image_height, image_width, depth_multiplier)
def test_preprocess_returns_correct_value_range(self):
image_height = 128
image_width = 128
depth_multiplier = 1
test_image = np.random.rand(4, image_height, image_width, 3)
feature_extractor = self._create_feature_extractor(depth_multiplier)
preprocessed_image = feature_extractor.preprocess(test_image)
self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
def test_variables_only_created_in_scope(self):
depth_multiplier = 1
scope_name = 'InceptionV2'
self.check_feature_extractor_variables_under_scope(depth_multiplier,
scope_name)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""SSDFeatureExtractor for MobilenetV1 features."""
import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators
from nets import mobilenet_v1
slim = tf.contrib.slim
class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""SSD Feature Extractor using MobilenetV1 features."""
def __init__(self,
depth_multiplier,
min_depth,
conv_hyperparams,
reuse_weights=None):
"""MobileNetV1 Feature Extractor for SSD Models.
Args:
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
reuse_weights: Whether to reuse variables. Default is None.
"""
super(SSDMobileNetV1FeatureExtractor, self).__init__(
depth_multiplier, min_depth, conv_hyperparams, reuse_weights)
def preprocess(self, resized_inputs):
"""SSD preprocessing.
Maps pixel values to the range [-1, 1].
Args:
resized_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
return (2.0 / 255.0) * resized_inputs - 1.0
def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
preprocessed_inputs.get_shape().assert_has_rank(4)
shape_assert = tf.Assert(
tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
['image size must at least be 33 in both height and width.'])
feature_map_layout = {
'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '',
'', ''],
'layer_depth': [-1, -1, 512, 256, 256, 128],
}
with tf.control_dependencies([shape_assert]):
with slim.arg_scope(self._conv_hyperparams):
with tf.variable_scope('MobilenetV1',
reuse=self._reuse_weights) as scope:
_, image_features = mobilenet_v1.mobilenet_v1_base(
preprocessed_inputs,
final_endpoint='Conv2d_13_pointwise',
min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
scope=scope)
feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_map_layout=feature_map_layout,
depth_multiplier=self._depth_multiplier,
min_depth=self._min_depth,
insert_1x1_conv=True,
image_features=image_features)
return feature_maps.values()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for ssd_mobilenet_v1_feature_extractor."""
import numpy as np
import tensorflow as tf
from object_detection.models import ssd_feature_extractor_test
from object_detection.models import ssd_mobilenet_v1_feature_extractor
class SsdMobilenetV1FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase):
def _create_feature_extractor(self, depth_multiplier):
"""Constructs a new feature extractor.
Args:
depth_multiplier: float depth multiplier for feature extractor
Returns:
an ssd_meta_arch.SSDFeatureExtractor object.
"""
min_depth = 32
conv_hyperparams = {}
return ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor(
depth_multiplier, min_depth, conv_hyperparams)
def test_extract_features_returns_correct_shapes_128(self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
expected_feature_map_shape = [(4, 8, 8, 512), (4, 4, 4, 1024),
(4, 2, 2, 512), (4, 1, 1, 256),
(4, 1, 1, 256), (4, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_299(self):
image_height = 299
image_width = 299
depth_multiplier = 1.0
expected_feature_map_shape = [(4, 19, 19, 512), (4, 10, 10, 1024),
(4, 5, 5, 512), (4, 3, 3, 256),
(4, 2, 2, 256), (4, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
image_height = 299
image_width = 299
depth_multiplier = 0.5**12
expected_feature_map_shape = [(4, 19, 19, 32), (4, 10, 10, 32),
(4, 5, 5, 32), (4, 3, 3, 32),
(4, 2, 2, 32), (4, 1, 1, 32)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, expected_feature_map_shape)
def test_extract_features_raises_error_with_invalid_image_size(self):
image_height = 32
image_width = 32
depth_multiplier = 1.0
self.check_extract_features_raises_error_with_invalid_image_size(
image_height, image_width, depth_multiplier)
def test_preprocess_returns_correct_value_range(self):
image_height = 128
image_width = 128
depth_multiplier = 1
test_image = np.random.rand(4, image_height, image_width, 3)
feature_extractor = self._create_feature_extractor(depth_multiplier)
preprocessed_image = feature_extractor.preprocess(test_image)
self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
def test_variables_only_created_in_scope(self):
depth_multiplier = 1
scope_name = 'MobilenetV1'
self.check_feature_extractor_variables_under_scope(depth_multiplier,
scope_name)
if __name__ == '__main__':
tf.test.main()
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Object Detection Demo\n",
"Welcome to the object detection inference walkthrough! This notebook will walk you step by step through the process of using a pre-trained model to detect objects in an image. Make sure to follow the [installation instructions](https://github.com/tensorflow/models/blob/master/object_detection/g3doc/installation.md) before you start."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Imports"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"scrolled": true
},
"outputs": [],
"source": [
"import numpy as np\n",
"import os\n",
"import six.moves.urllib as urllib\n",
"import sys\n",
"import tarfile\n",
"import tensorflow as tf\n",
"import zipfile\n",
"\n",
"from collections import defaultdict\n",
"from io import StringIO\n",
"from matplotlib import pyplot as plt\n",
"from PIL import Image"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Env setup"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# This is needed to display the images.\n",
"%matplotlib inline\n",
"\n",
"# This is needed since the notebook is stored in the object_detection folder.\n",
"sys.path.append(\"..\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Object detection imports\n",
"Here are the imports from the object detection module."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from utils import label_map_util\n",
"\n",
"from utils import visualization_utils as vis_util"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Model preparation "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Variables\n",
"\n",
"Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_CKPT` to point to a new .pb file. \n",
"\n",
"By default we use an \"SSD with Mobilenet\" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# What model to download.\n",
"MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017'\n",
"MODEL_FILE = MODEL_NAME + '.tar.gz'\n",
"DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'\n",
"\n",
"# Path to frozen detection graph. This is the actual model that is used for the object detection.\n",
"PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'\n",
"\n",
"# List of the strings that is used to add correct label for each box.\n",
"PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')\n",
"\n",
"NUM_CLASSES = 90"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Download Model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"opener = urllib.request.URLopener()\n",
"opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)\n",
"tar_file = tarfile.open(MODEL_FILE)\n",
"for file in tar_file.getmembers():\n",
" file_name = os.path.basename(file.name)\n",
" if 'frozen_inference_graph.pb' in file_name:\n",
" tar_file.extract(file, os.getcwd())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load a (frozen) Tensorflow model into memory."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"detection_graph = tf.Graph()\n",
"with detection_graph.as_default():\n",
" od_graph_def = tf.GraphDef()\n",
" with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:\n",
" serialized_graph = fid.read()\n",
" od_graph_def.ParseFromString(serialized_graph)\n",
" tf.import_graph_def(od_graph_def, name='')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Loading label map\n",
"Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"label_map = label_map_util.load_labelmap(PATH_TO_LABELS)\n",
"categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)\n",
"category_index = label_map_util.create_category_index(categories)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Helper code"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def load_image_into_numpy_array(image):\n",
" (im_width, im_height) = image.size\n",
" return np.array(image.getdata()).reshape(\n",
" (im_height, im_width, 3)).astype(np.uint8)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Detection"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# For the sake of simplicity we will use only 2 images:\n",
"# image1.jpg\n",
"# image2.jpg\n",
"# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.\n",
"PATH_TO_TEST_IMAGES_DIR = 'test_images'\n",
"TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 3) ]\n",
"\n",
"# Size, in inches, of the output images.\n",
"IMAGE_SIZE = (12, 8)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"with detection_graph.as_default():\n",
" with tf.Session(graph=detection_graph) as sess:\n",
" for image_path in TEST_IMAGE_PATHS:\n",
" image = Image.open(image_path)\n",
" # the array based representation of the image will be used later in order to prepare the\n",
" # result image with boxes and labels on it.\n",
" image_np = load_image_into_numpy_array(image)\n",
" # Expand dimensions since the model expects images to have shape: [1, None, None, 3]\n",
" image_np_expanded = np.expand_dims(image_np, axis=0)\n",
" image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')\n",
" # Each box represents a part of the image where a particular object was detected.\n",
" boxes = detection_graph.get_tensor_by_name('detection_boxes:0')\n",
" # Each score represent how level of confidence for each of the objects.\n",
" # Score is shown on the result image, together with the class label.\n",
" scores = detection_graph.get_tensor_by_name('detection_scores:0')\n",
" classes = detection_graph.get_tensor_by_name('detection_classes:0')\n",
" num_detections = detection_graph.get_tensor_by_name('num_detections:0')\n",
" # Actual detection.\n",
" (boxes, scores, classes, num_detections) = sess.run(\n",
" [boxes, scores, classes, num_detections],\n",
" feed_dict={image_tensor: image_np_expanded})\n",
" # Visualization of the results of a detection.\n",
" vis_util.visualize_boxes_and_labels_on_image_array(\n",
" image_np,\n",
" np.squeeze(boxes),\n",
" np.squeeze(classes).astype(np.int32),\n",
" np.squeeze(scores),\n",
" category_index,\n",
" use_normalized_coordinates=True,\n",
" line_thickness=8)\n",
" plt.figure(figsize=IMAGE_SIZE)\n",
" plt.imshow(image_np)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
# Tensorflow Object Detection API: Configuration protos.
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
proto_library(
name = "argmax_matcher_proto",
srcs = ["argmax_matcher.proto"],
)
py_proto_library(
name = "argmax_matcher_py_pb2",
api_version = 2,
deps = [":argmax_matcher_proto"],
)
proto_library(
name = "bipartite_matcher_proto",
srcs = ["bipartite_matcher.proto"],
)
py_proto_library(
name = "bipartite_matcher_py_pb2",
api_version = 2,
deps = [":bipartite_matcher_proto"],
)
proto_library(
name = "matcher_proto",
srcs = ["matcher.proto"],
deps = [
":argmax_matcher_proto",
":bipartite_matcher_proto",
],
)
py_proto_library(
name = "matcher_py_pb2",
api_version = 2,
deps = [":matcher_proto"],
)
proto_library(
name = "faster_rcnn_box_coder_proto",
srcs = ["faster_rcnn_box_coder.proto"],
)
py_proto_library(
name = "faster_rcnn_box_coder_py_pb2",
api_version = 2,
deps = [":faster_rcnn_box_coder_proto"],
)
proto_library(
name = "mean_stddev_box_coder_proto",
srcs = ["mean_stddev_box_coder.proto"],
)
py_proto_library(
name = "mean_stddev_box_coder_py_pb2",
api_version = 2,
deps = [":mean_stddev_box_coder_proto"],
)
proto_library(
name = "square_box_coder_proto",
srcs = ["square_box_coder.proto"],
)
py_proto_library(
name = "square_box_coder_py_pb2",
api_version = 2,
deps = [":square_box_coder_proto"],
)
proto_library(
name = "box_coder_proto",
srcs = ["box_coder.proto"],
deps = [
":faster_rcnn_box_coder_proto",
":mean_stddev_box_coder_proto",
":square_box_coder_proto",
],
)
py_proto_library(
name = "box_coder_py_pb2",
api_version = 2,
deps = [":box_coder_proto"],
)
proto_library(
name = "grid_anchor_generator_proto",
srcs = ["grid_anchor_generator.proto"],
)
py_proto_library(
name = "grid_anchor_generator_py_pb2",
api_version = 2,
deps = [":grid_anchor_generator_proto"],
)
proto_library(
name = "ssd_anchor_generator_proto",
srcs = ["ssd_anchor_generator.proto"],
)
py_proto_library(
name = "ssd_anchor_generator_py_pb2",
api_version = 2,
deps = [":ssd_anchor_generator_proto"],
)
proto_library(
name = "anchor_generator_proto",
srcs = ["anchor_generator.proto"],
deps = [
":grid_anchor_generator_proto",
":ssd_anchor_generator_proto",
],
)
py_proto_library(
name = "anchor_generator_py_pb2",
api_version = 2,
deps = [":anchor_generator_proto"],
)
proto_library(
name = "input_reader_proto",
srcs = ["input_reader.proto"],
)
py_proto_library(
name = "input_reader_py_pb2",
api_version = 2,
deps = [":input_reader_proto"],
)
proto_library(
name = "losses_proto",
srcs = ["losses.proto"],
)
py_proto_library(
name = "losses_py_pb2",
api_version = 2,
deps = [":losses_proto"],
)
proto_library(
name = "optimizer_proto",
srcs = ["optimizer.proto"],
)
py_proto_library(
name = "optimizer_py_pb2",
api_version = 2,
deps = [":optimizer_proto"],
)
proto_library(
name = "post_processing_proto",
srcs = ["post_processing.proto"],
)
py_proto_library(
name = "post_processing_py_pb2",
api_version = 2,
deps = [":post_processing_proto"],
)
proto_library(
name = "hyperparams_proto",
srcs = ["hyperparams.proto"],
)
py_proto_library(
name = "hyperparams_py_pb2",
api_version = 2,
deps = [":hyperparams_proto"],
)
proto_library(
name = "box_predictor_proto",
srcs = ["box_predictor.proto"],
deps = [":hyperparams_proto"],
)
py_proto_library(
name = "box_predictor_py_pb2",
api_version = 2,
deps = [":box_predictor_proto"],
)
proto_library(
name = "region_similarity_calculator_proto",
srcs = ["region_similarity_calculator.proto"],
deps = [],
)
py_proto_library(
name = "region_similarity_calculator_py_pb2",
api_version = 2,
deps = [":region_similarity_calculator_proto"],
)
proto_library(
name = "preprocessor_proto",
srcs = ["preprocessor.proto"],
)
py_proto_library(
name = "preprocessor_py_pb2",
api_version = 2,
deps = [":preprocessor_proto"],
)
proto_library(
name = "train_proto",
srcs = ["train.proto"],
deps = [
":optimizer_proto",
":preprocessor_proto",
],
)
py_proto_library(
name = "train_py_pb2",
api_version = 2,
deps = [":train_proto"],
)
proto_library(
name = "eval_proto",
srcs = ["eval.proto"],
)
py_proto_library(
name = "eval_py_pb2",
api_version = 2,
deps = [":eval_proto"],
)
proto_library(
name = "image_resizer_proto",
srcs = ["image_resizer.proto"],
)
py_proto_library(
name = "image_resizer_py_pb2",
api_version = 2,
deps = [":image_resizer_proto"],
)
proto_library(
name = "faster_rcnn_proto",
srcs = ["faster_rcnn.proto"],
deps = [
":box_predictor_proto",
"//object_detection/protos:anchor_generator_proto",
"//object_detection/protos:hyperparams_proto",
"//object_detection/protos:image_resizer_proto",
"//object_detection/protos:losses_proto",
"//object_detection/protos:post_processing_proto",
],
)
proto_library(
name = "ssd_proto",
srcs = ["ssd.proto"],
deps = [
":anchor_generator_proto",
":box_coder_proto",
":box_predictor_proto",
":hyperparams_proto",
":image_resizer_proto",
":losses_proto",
":matcher_proto",
":post_processing_proto",
":region_similarity_calculator_proto",
],
)
proto_library(
name = "model_proto",
srcs = ["model.proto"],
deps = [
":faster_rcnn_proto",
":ssd_proto",
],
)
py_proto_library(
name = "model_py_pb2",
api_version = 2,
deps = [":model_proto"],
)
proto_library(
name = "pipeline_proto",
srcs = ["pipeline.proto"],
deps = [
":eval_proto",
":input_reader_proto",
":model_proto",
":train_proto",
],
)
py_proto_library(
name = "pipeline_py_pb2",
api_version = 2,
deps = [":pipeline_proto"],
)
proto_library(
name = "string_int_label_map_proto",
srcs = ["string_int_label_map.proto"],
)
py_proto_library(
name = "string_int_label_map_py_pb2",
api_version = 2,
deps = [":string_int_label_map_proto"],
)
syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/grid_anchor_generator.proto";
import "object_detection/protos/ssd_anchor_generator.proto";
// Configuration proto for the anchor generator to use in the object detection
// pipeline. See core/anchor_generator.py for details.
message AnchorGenerator {
oneof anchor_generator_oneof {
GridAnchorGenerator grid_anchor_generator = 1;
SsdAnchorGenerator ssd_anchor_generator = 2;
}
}
syntax = "proto2";
package object_detection.protos;
// Configuration proto for ArgMaxMatcher. See
// matchers/argmax_matcher.py for details.
message ArgMaxMatcher {
// Threshold for positive matches.
optional float matched_threshold = 1 [default = 0.5];
// Threshold for negative matches.
optional float unmatched_threshold = 2 [default = 0.5];
// Whether to construct ArgMaxMatcher without thresholds.
optional bool ignore_thresholds = 3 [default = false];
// If True then negative matches are the ones below the unmatched_threshold,
// whereas ignored matches are in between the matched and umatched
// threshold. If False, then negative matches are in between the matched
// and unmatched threshold, and everything lower than unmatched is ignored.
optional bool negatives_lower_than_unmatched = 4 [default = true];
// Whether to ensure each row is matched to at least one column.
optional bool force_match_for_each_row = 5 [default = false];
}
syntax = "proto2";
package object_detection.protos;
// Configuration proto for bipartite matcher. See
// matchers/bipartite_matcher.py for details.
message BipartiteMatcher {
}
syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/faster_rcnn_box_coder.proto";
import "object_detection/protos/mean_stddev_box_coder.proto";
import "object_detection/protos/square_box_coder.proto";
// Configuration proto for the box coder to be used in the object detection
// pipeline. See core/box_coder.py for details.
message BoxCoder {
oneof box_coder_oneof {
FasterRcnnBoxCoder faster_rcnn_box_coder = 1;
MeanStddevBoxCoder mean_stddev_box_coder = 2;
SquareBoxCoder square_box_coder = 3;
}
}
syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/hyperparams.proto";
// Configuration proto for box predictor. See core/box_predictor.py for details.
message BoxPredictor {
oneof box_predictor_oneof {
ConvolutionalBoxPredictor convolutional_box_predictor = 1;
MaskRCNNBoxPredictor mask_rcnn_box_predictor = 2;
RfcnBoxPredictor rfcn_box_predictor = 3;
}
}
// Configuration proto for Convolutional box predictor.
message ConvolutionalBoxPredictor {
// Hyperparameters for convolution ops used in the box predictor.
optional Hyperparams conv_hyperparams = 1;
// Minumum feature depth prior to predicting box encodings and class
// predictions.
optional int32 min_depth = 2 [default = 0];
// Maximum feature depth prior to predicting box encodings and class
// predictions. If max_depth is set to 0, no additional feature map will be
// inserted before location and class predictions.
optional int32 max_depth = 3 [default = 0];
// Number of the additional conv layers before the predictor.
optional int32 num_layers_before_predictor = 4 [default = 0];
// Whether to use dropout for class prediction.
optional bool use_dropout = 5 [default = true];
// Keep probability for dropout
optional float dropout_keep_probability = 6 [default = 0.8];
// Size of final convolution kernel. If the spatial resolution of the feature
// map is smaller than the kernel size, then the kernel size is set to
// min(feature_width, feature_height).
optional int32 kernel_size = 7 [default = 1];
// Size of the encoding for boxes.
optional int32 box_code_size = 8 [default = 4];
// Whether to apply sigmoid to the output of class predictions.
// TODO: Do we need this since we have a post processing module.?
optional bool apply_sigmoid_to_scores = 9 [default = false];
}
message MaskRCNNBoxPredictor {
// Hyperparameters for fully connected ops used in the box predictor.
optional Hyperparams fc_hyperparams = 1;
// Whether to use dropout op prior to the both box and class predictions.
optional bool use_dropout = 2 [default= false];
// Keep probability for dropout. This is only used if use_dropout is true.
optional float dropout_keep_probability = 3 [default = 0.5];
// Size of the encoding for the boxes.
optional int32 box_code_size = 4 [default = 4];
// Hyperparameters for convolution ops used in the box predictor.
optional Hyperparams conv_hyperparams = 5;
// Whether to predict instance masks inside detection boxes.
optional bool predict_instance_masks = 6 [default = false];
// The depth for the first conv2d_transpose op applied to the
// image_features in the mask prediciton branch
optional int32 mask_prediction_conv_depth = 7 [default = 256];
// Whether to predict keypoints inside detection boxes.
optional bool predict_keypoints = 8 [default = false];
}
message RfcnBoxPredictor {
// Hyperparameters for convolution ops used in the box predictor.
optional Hyperparams conv_hyperparams = 1;
// Bin sizes for RFCN crops.
optional int32 num_spatial_bins_height = 2 [default = 3];
optional int32 num_spatial_bins_width = 3 [default = 3];
// Target depth to reduce the input image features to.
optional int32 depth = 4 [default=1024];
// Size of the encoding for the boxes.
optional int32 box_code_size = 5 [default = 4];
// Size to resize the rfcn crops to.
optional int32 crop_height = 6 [default= 12];
optional int32 crop_width = 7 [default=12];
}
syntax = "proto2";
package object_detection.protos;
// Message for configuring DetectionModel evaluation jobs (eval.py).
message EvalConfig {
// Number of visualization images to generate.
optional uint32 num_visualizations = 1 [default=10];
// Number of examples to process of evaluation.
optional uint32 num_examples = 2 [default=5000];
// How often to run evaluation.
optional uint32 eval_interval_secs = 3 [default=300];
// Maximum number of times to run evaluation. If set to 0, will run forever.
optional uint32 max_evals = 4 [default=0];
// Whether the TensorFlow graph used for evaluation should be saved to disk.
optional bool save_graph = 5 [default=false];
// Path to directory to store visualizations in. If empty, visualization
// images are not exported (only shown on Tensorboard).
optional string visualization_export_dir = 6 [default=""];
// BNS name of the TensorFlow master.
optional string eval_master = 7 [default=""];
// Type of metrics to use for evaluation. Currently supports only Pascal VOC
// detection metrics.
optional string metrics_set = 8 [default="pascal_voc_metrics"];
// Path to export detections to COCO compatible JSON format.
optional string export_path = 9 [default=''];
// Option to not read groundtruth labels and only export detections to
// COCO-compatible JSON file.
optional bool ignore_groundtruth = 10 [default=false];
// Use exponential moving averages of variables for evaluation.
// TODO: When this is false make sure the model is constructed
// without moving averages in restore_fn.
optional bool use_moving_averages = 11 [default=false];
// Whether to evaluate instance masks.
optional bool eval_instance_masks = 12 [default=false];
}
syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/anchor_generator.proto";
import "object_detection/protos/box_predictor.proto";
import "object_detection/protos/hyperparams.proto";
import "object_detection/protos/image_resizer.proto";
import "object_detection/protos/losses.proto";
import "object_detection/protos/post_processing.proto";
// Configuration for Faster R-CNN models.
// See meta_architectures/faster_rcnn_meta_arch.py and models/model_builder.py
//
// Naming conventions:
// Faster R-CNN models have two stages: a first stage region proposal network
// (or RPN) and a second stage box classifier. We thus use the prefixes
// `first_stage_` and `second_stage_` to indicate the stage to which each
// parameter pertains when relevant.
message FasterRcnn {
// Whether to construct only the Region Proposal Network (RPN).
optional bool first_stage_only = 1 [default=false];
// Number of classes to predict.
optional int32 num_classes = 3;
// Image resizer for preprocessing the input image.
optional ImageResizer image_resizer = 4;
// Feature extractor config.
optional FasterRcnnFeatureExtractor feature_extractor = 5;
// (First stage) region proposal network (RPN) parameters.
// Anchor generator to compute RPN anchors.
optional AnchorGenerator first_stage_anchor_generator = 6;
// Atrous rate for the convolution op applied to the
// `first_stage_features_to_crop` tensor to obtain box predictions.
optional int32 first_stage_atrous_rate = 7 [default=1];
// Hyperparameters for the convolutional RPN box predictor.
optional Hyperparams first_stage_box_predictor_conv_hyperparams = 8;
// Kernel size to use for the convolution op just prior to RPN box
// predictions.
optional int32 first_stage_box_predictor_kernel_size = 9 [default=3];
// Output depth for the convolution op just prior to RPN box predictions.
optional int32 first_stage_box_predictor_depth = 10 [default=512];
// The batch size to use for computing the first stage objectness and
// location losses.
optional int32 first_stage_minibatch_size = 11 [default=256];
// Fraction of positive examples per image for the RPN.
optional float first_stage_positive_balance_fraction = 12 [default=0.5];
// Non max suppression score threshold applied to first stage RPN proposals.
optional float first_stage_nms_score_threshold = 13 [default=0.0];
// Non max suppression IOU threshold applied to first stage RPN proposals.
optional float first_stage_nms_iou_threshold = 14 [default=0.7];
// Maximum number of RPN proposals retained after first stage postprocessing.
optional int32 first_stage_max_proposals = 15 [default=300];
// First stage RPN localization loss weight.
optional float first_stage_localization_loss_weight = 16 [default=1.0];
// First stage RPN objectness loss weight.
optional float first_stage_objectness_loss_weight = 17 [default=1.0];
// Per-region cropping parameters.
// Note that if a R-FCN model is constructed the per region cropping
// parameters below are ignored.
// Output size (width and height are set to be the same) of the initial
// bilinear interpolation based cropping during ROI pooling.
optional int32 initial_crop_size = 18;
// Kernel size of the max pool op on the cropped feature map during
// ROI pooling.
optional int32 maxpool_kernel_size = 19;
// Stride of the max pool op on the cropped feature map during ROI pooling.
optional int32 maxpool_stride = 20;
// (Second stage) box classifier parameters
// Hyperparameters for the second stage box predictor. If box predictor type
// is set to rfcn_box_predictor, a R-FCN model is constructed, otherwise a
// Faster R-CNN model is constructed.
optional BoxPredictor second_stage_box_predictor = 21;
// The batch size per image used for computing the classification and refined
// location loss of the box classifier.
// Note that this field is ignored if `hard_example_miner` is configured.
optional int32 second_stage_batch_size = 22 [default=64];
// Fraction of positive examples to use per image for the box classifier.
optional float second_stage_balance_fraction = 23 [default=0.25];
// Post processing to apply on the second stage box classifier predictions.
// Note: the `score_converter` provided to the FasterRCNNMetaArch constructor
// is taken from this `second_stage_post_processing` proto.
optional PostProcessing second_stage_post_processing = 24;
// Second stage refined localization loss weight.
optional float second_stage_localization_loss_weight = 25 [default=1.0];
// Second stage classification loss weight
optional float second_stage_classification_loss_weight = 26 [default=1.0];
// If not left to default, applies hard example mining.
optional HardExampleMiner hard_example_miner = 27;
}
message FasterRcnnFeatureExtractor {
// Type of Faster R-CNN model (e.g., 'faster_rcnn_resnet101';
// See models/model_builder.py for expected types).
optional string type = 1;
// Output stride of extracted RPN feature map.
optional int32 first_stage_features_stride = 2 [default=16];
}
syntax = "proto2";
package object_detection.protos;
// Configuration proto for FasterRCNNBoxCoder. See
// box_coders/faster_rcnn_box_coder.py for details.
message FasterRcnnBoxCoder {
// Scale factor for anchor encoded box center.
optional float y_scale = 1 [default = 10.0];
optional float x_scale = 2 [default = 10.0];
// Scale factor for anchor encoded box height.
optional float height_scale = 3 [default = 5.0];
// Scale factor for anchor encoded box width.
optional float width_scale = 4 [default = 5.0];
}
syntax = "proto2";
package object_detection.protos;
// Configuration proto for GridAnchorGenerator. See
// anchor_generators/grid_anchor_generator.py for details.
message GridAnchorGenerator {
// Anchor height in pixels.
optional int32 height = 1 [default = 256];
// Anchor width in pixels.
optional int32 width = 2 [default = 256];
// Anchor stride in height dimension in pixels.
optional int32 height_stride = 3 [default = 16];
// Anchor stride in width dimension in pixels.
optional int32 width_stride = 4 [default = 16];
// Anchor height offset in pixels.
optional int32 height_offset = 5 [default = 0];
// Anchor width offset in pixels.
optional int32 width_offset = 6 [default = 0];
// At any given location, len(scales) * len(aspect_ratios) anchors are
// generated with all possible combinations of scales and aspect ratios.
// List of scales for the anchors.
repeated float scales = 7;
// List of aspect ratios for the anchors.
repeated float aspect_ratios = 8;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment