Commit f282f6ef authored by Alexander Gorban's avatar Alexander Gorban
Browse files

Merge branch 'master' of github.com:tensorflow/models

parents 58a5da7b a2970b03
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for feature map generators."""
import tensorflow as tf
from object_detection.models import feature_map_generators
INCEPTION_V2_LAYOUT = {
'from_layer': ['Mixed_3c', 'Mixed_4c', 'Mixed_5c', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 256],
'anchor_strides': [16, 32, 64, -1, -1, -1],
'layer_target_norm': [20.0, -1, -1, -1, -1, -1],
}
INCEPTION_V3_LAYOUT = {
'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 128],
'anchor_strides': [16, 32, 64, -1, -1, -1],
'aspect_ratios': [1.0, 2.0, 1.0/2, 3.0, 1.0/3]
}
# TODO: add tests with different anchor strides.
class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
def test_get_expected_feature_map_shapes_with_inception_v2(self):
image_features = {
'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32),
'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32)
}
feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_map_layout=INCEPTION_V2_LAYOUT,
depth_multiplier=1,
min_depth=32,
insert_1x1_conv=True,
image_features=image_features)
expected_feature_map_shapes = {
'Mixed_3c': (4, 28, 28, 256),
'Mixed_4c': (4, 14, 14, 576),
'Mixed_5c': (4, 7, 7, 1024),
'Mixed_5c_2_Conv2d_3_3x3_s2_512': (4, 4, 4, 512),
'Mixed_5c_2_Conv2d_4_3x3_s2_256': (4, 2, 2, 256),
'Mixed_5c_2_Conv2d_5_3x3_s2_256': (4, 1, 1, 256)}
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
out_feature_maps = sess.run(feature_maps)
out_feature_map_shapes = dict(
(key, value.shape) for key, value in out_feature_maps.iteritems())
self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
def test_get_expected_feature_map_shapes_with_inception_v3(self):
image_features = {
'Mixed_5d': tf.random_uniform([4, 35, 35, 256], dtype=tf.float32),
'Mixed_6e': tf.random_uniform([4, 17, 17, 576], dtype=tf.float32),
'Mixed_7c': tf.random_uniform([4, 8, 8, 1024], dtype=tf.float32)
}
feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_map_layout=INCEPTION_V3_LAYOUT,
depth_multiplier=1,
min_depth=32,
insert_1x1_conv=True,
image_features=image_features)
expected_feature_map_shapes = {
'Mixed_5d': (4, 35, 35, 256),
'Mixed_6e': (4, 17, 17, 576),
'Mixed_7c': (4, 8, 8, 1024),
'Mixed_7c_2_Conv2d_3_3x3_s2_512': (4, 4, 4, 512),
'Mixed_7c_2_Conv2d_4_3x3_s2_256': (4, 2, 2, 256),
'Mixed_7c_2_Conv2d_5_3x3_s2_128': (4, 1, 1, 128)}
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
out_feature_maps = sess.run(feature_maps)
out_feature_map_shapes = dict(
(key, value.shape) for key, value in out_feature_maps.iteritems())
self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
class GetDepthFunctionTest(tf.test.TestCase):
def test_return_min_depth_when_multiplier_is_small(self):
depth_fn = feature_map_generators.get_depth_fn(depth_multiplier=0.5,
min_depth=16)
self.assertEqual(depth_fn(16), 16)
def test_return_correct_depth_with_multiplier(self):
depth_fn = feature_map_generators.get_depth_fn(depth_multiplier=0.5,
min_depth=16)
self.assertEqual(depth_fn(64), 32)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Base test class SSDFeatureExtractors."""
from abc import abstractmethod
import numpy as np
import tensorflow as tf
class SsdFeatureExtractorTestBase(object):
def _validate_features_shape(self,
feature_extractor,
preprocessed_inputs,
expected_feature_map_shapes):
"""Checks the extracted features are of correct shape.
Args:
feature_extractor: The feature extractor to test.
preprocessed_inputs: A [batch, height, width, 3] tensor to extract
features with.
expected_feature_map_shapes: The expected shape of the extracted features.
"""
feature_maps = feature_extractor.extract_features(preprocessed_inputs)
feature_map_shapes = [tf.shape(feature_map) for feature_map in feature_maps]
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
feature_map_shapes_out = sess.run(feature_map_shapes)
for shape_out, exp_shape_out in zip(
feature_map_shapes_out, expected_feature_map_shapes):
self.assertAllEqual(shape_out, exp_shape_out)
@abstractmethod
def _create_feature_extractor(self, depth_multiplier):
"""Constructs a new feature extractor.
Args:
depth_multiplier: float depth multiplier for feature extractor
Returns:
an ssd_meta_arch.SSDFeatureExtractor object.
"""
pass
def check_extract_features_returns_correct_shape(
self,
image_height,
image_width,
depth_multiplier,
expected_feature_map_shapes_out):
feature_extractor = self._create_feature_extractor(depth_multiplier)
preprocessed_inputs = tf.random_uniform(
[4, image_height, image_width, 3], dtype=tf.float32)
self._validate_features_shape(
feature_extractor, preprocessed_inputs, expected_feature_map_shapes_out)
def check_extract_features_raises_error_with_invalid_image_size(
self,
image_height,
image_width,
depth_multiplier):
feature_extractor = self._create_feature_extractor(depth_multiplier)
preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
feature_maps = feature_extractor.extract_features(preprocessed_inputs)
test_preprocessed_image = np.random.rand(4, image_height, image_width, 3)
with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
with self.assertRaises(tf.errors.InvalidArgumentError):
sess.run(feature_maps,
feed_dict={preprocessed_inputs: test_preprocessed_image})
def check_feature_extractor_variables_under_scope(self,
depth_multiplier,
scope_name):
g = tf.Graph()
with g.as_default():
feature_extractor = self._create_feature_extractor(depth_multiplier)
preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
feature_extractor.extract_features(preprocessed_inputs)
variables = g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
for variable in variables:
self.assertTrue(variable.name.startswith(scope_name))
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""SSDFeatureExtractor for InceptionV2 features."""
import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators
from nets import inception_v2
slim = tf.contrib.slim
class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""SSD Feature Extractor using InceptionV2 features."""
def __init__(self,
depth_multiplier,
min_depth,
conv_hyperparams,
reuse_weights=None):
"""InceptionV2 Feature Extractor for SSD Models.
Args:
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
reuse_weights: Whether to reuse variables. Default is None.
"""
super(SSDInceptionV2FeatureExtractor, self).__init__(
depth_multiplier, min_depth, conv_hyperparams, reuse_weights)
def preprocess(self, resized_inputs):
"""SSD preprocessing.
Maps pixel values to the range [-1, 1].
Args:
resized_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
return (2.0 / 255.0) * resized_inputs - 1.0
def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
preprocessed_inputs.get_shape().assert_has_rank(4)
shape_assert = tf.Assert(
tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
['image size must at least be 33 in both height and width.'])
feature_map_layout = {
'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', ''],
'layer_depth': [-1, -1, 512, 256, 256, 128],
}
with tf.control_dependencies([shape_assert]):
with slim.arg_scope(self._conv_hyperparams):
with tf.variable_scope('InceptionV2',
reuse=self._reuse_weights) as scope:
_, image_features = inception_v2.inception_v2_base(
preprocessed_inputs,
final_endpoint='Mixed_5c',
min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
scope=scope)
feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_map_layout=feature_map_layout,
depth_multiplier=self._depth_multiplier,
min_depth=self._min_depth,
insert_1x1_conv=True,
image_features=image_features)
return feature_maps.values()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.models.ssd_inception_v2_feature_extractor."""
import numpy as np
import tensorflow as tf
from object_detection.models import ssd_feature_extractor_test
from object_detection.models import ssd_inception_v2_feature_extractor
class SsdInceptionV2FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase,
tf.test.TestCase):
def _create_feature_extractor(self, depth_multiplier):
"""Constructs a SsdInceptionV2FeatureExtractor.
Args:
depth_multiplier: float depth multiplier for feature extractor
Returns:
an ssd_inception_v2_feature_extractor.SsdInceptionV2FeatureExtractor.
"""
min_depth = 32
conv_hyperparams = {}
return ssd_inception_v2_feature_extractor.SSDInceptionV2FeatureExtractor(
depth_multiplier, min_depth, conv_hyperparams)
def test_extract_features_returns_correct_shapes_128(self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
expected_feature_map_shape = [(4, 8, 8, 576), (4, 4, 4, 1024),
(4, 2, 2, 512), (4, 1, 1, 256),
(4, 1, 1, 256), (4, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_299(self):
image_height = 299
image_width = 299
depth_multiplier = 1.0
expected_feature_map_shape = [(4, 19, 19, 576), (4, 10, 10, 1024),
(4, 5, 5, 512), (4, 3, 3, 256),
(4, 2, 2, 256), (4, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
image_height = 299
image_width = 299
depth_multiplier = 0.5**12
expected_feature_map_shape = [(4, 19, 19, 128), (4, 10, 10, 128),
(4, 5, 5, 32), (4, 3, 3, 32),
(4, 2, 2, 32), (4, 1, 1, 32)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, expected_feature_map_shape)
def test_extract_features_raises_error_with_invalid_image_size(self):
image_height = 32
image_width = 32
depth_multiplier = 1.0
self.check_extract_features_raises_error_with_invalid_image_size(
image_height, image_width, depth_multiplier)
def test_preprocess_returns_correct_value_range(self):
image_height = 128
image_width = 128
depth_multiplier = 1
test_image = np.random.rand(4, image_height, image_width, 3)
feature_extractor = self._create_feature_extractor(depth_multiplier)
preprocessed_image = feature_extractor.preprocess(test_image)
self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
def test_variables_only_created_in_scope(self):
depth_multiplier = 1
scope_name = 'InceptionV2'
self.check_feature_extractor_variables_under_scope(depth_multiplier,
scope_name)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""SSDFeatureExtractor for MobilenetV1 features."""
import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators
from nets import mobilenet_v1
slim = tf.contrib.slim
class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""SSD Feature Extractor using MobilenetV1 features."""
def __init__(self,
depth_multiplier,
min_depth,
conv_hyperparams,
reuse_weights=None):
"""MobileNetV1 Feature Extractor for SSD Models.
Args:
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
reuse_weights: Whether to reuse variables. Default is None.
"""
super(SSDMobileNetV1FeatureExtractor, self).__init__(
depth_multiplier, min_depth, conv_hyperparams, reuse_weights)
def preprocess(self, resized_inputs):
"""SSD preprocessing.
Maps pixel values to the range [-1, 1].
Args:
resized_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
return (2.0 / 255.0) * resized_inputs - 1.0
def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
preprocessed_inputs.get_shape().assert_has_rank(4)
shape_assert = tf.Assert(
tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
['image size must at least be 33 in both height and width.'])
feature_map_layout = {
'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '',
'', ''],
'layer_depth': [-1, -1, 512, 256, 256, 128],
}
with tf.control_dependencies([shape_assert]):
with slim.arg_scope(self._conv_hyperparams):
with tf.variable_scope('MobilenetV1',
reuse=self._reuse_weights) as scope:
_, image_features = mobilenet_v1.mobilenet_v1_base(
preprocessed_inputs,
final_endpoint='Conv2d_13_pointwise',
min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
scope=scope)
feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_map_layout=feature_map_layout,
depth_multiplier=self._depth_multiplier,
min_depth=self._min_depth,
insert_1x1_conv=True,
image_features=image_features)
return feature_maps.values()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for ssd_mobilenet_v1_feature_extractor."""
import numpy as np
import tensorflow as tf
from object_detection.models import ssd_feature_extractor_test
from object_detection.models import ssd_mobilenet_v1_feature_extractor
class SsdMobilenetV1FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase):
def _create_feature_extractor(self, depth_multiplier):
"""Constructs a new feature extractor.
Args:
depth_multiplier: float depth multiplier for feature extractor
Returns:
an ssd_meta_arch.SSDFeatureExtractor object.
"""
min_depth = 32
conv_hyperparams = {}
return ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor(
depth_multiplier, min_depth, conv_hyperparams)
def test_extract_features_returns_correct_shapes_128(self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
expected_feature_map_shape = [(4, 8, 8, 512), (4, 4, 4, 1024),
(4, 2, 2, 512), (4, 1, 1, 256),
(4, 1, 1, 256), (4, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_299(self):
image_height = 299
image_width = 299
depth_multiplier = 1.0
expected_feature_map_shape = [(4, 19, 19, 512), (4, 10, 10, 1024),
(4, 5, 5, 512), (4, 3, 3, 256),
(4, 2, 2, 256), (4, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
image_height = 299
image_width = 299
depth_multiplier = 0.5**12
expected_feature_map_shape = [(4, 19, 19, 32), (4, 10, 10, 32),
(4, 5, 5, 32), (4, 3, 3, 32),
(4, 2, 2, 32), (4, 1, 1, 32)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, expected_feature_map_shape)
def test_extract_features_raises_error_with_invalid_image_size(self):
image_height = 32
image_width = 32
depth_multiplier = 1.0
self.check_extract_features_raises_error_with_invalid_image_size(
image_height, image_width, depth_multiplier)
def test_preprocess_returns_correct_value_range(self):
image_height = 128
image_width = 128
depth_multiplier = 1
test_image = np.random.rand(4, image_height, image_width, 3)
feature_extractor = self._create_feature_extractor(depth_multiplier)
preprocessed_image = feature_extractor.preprocess(test_image)
self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
def test_variables_only_created_in_scope(self):
depth_multiplier = 1
scope_name = 'MobilenetV1'
self.check_feature_extractor_variables_under_scope(depth_multiplier,
scope_name)
if __name__ == '__main__':
tf.test.main()
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Object Detection Demo\n",
"Welcome to the object detection inference walkthrough! This notebook will walk you step by step through the process of using a pre-trained model to detect objects in an image. Make sure to follow the [installation instructions](https://github.com/tensorflow/models/blob/master/object_detection/g3doc/installation.md) before you start."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Imports"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"scrolled": true
},
"outputs": [],
"source": [
"import numpy as np\n",
"import os\n",
"import six.moves.urllib as urllib\n",
"import sys\n",
"import tarfile\n",
"import tensorflow as tf\n",
"import zipfile\n",
"\n",
"from collections import defaultdict\n",
"from io import StringIO\n",
"from matplotlib import pyplot as plt\n",
"from PIL import Image"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Env setup"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# This is needed to display the images.\n",
"%matplotlib inline\n",
"\n",
"# This is needed since the notebook is stored in the object_detection folder.\n",
"sys.path.append(\"..\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Object detection imports\n",
"Here are the imports from the object detection module."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from utils import label_map_util\n",
"\n",
"from utils import visualization_utils as vis_util"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Model preparation "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Variables\n",
"\n",
"Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_CKPT` to point to a new .pb file. \n",
"\n",
"By default we use an \"SSD with Mobilenet\" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# What model to download.\n",
"MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017'\n",
"MODEL_FILE = MODEL_NAME + '.tar.gz'\n",
"DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'\n",
"\n",
"# Path to frozen detection graph. This is the actual model that is used for the object detection.\n",
"PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'\n",
"\n",
"# List of the strings that is used to add correct label for each box.\n",
"PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')\n",
"\n",
"NUM_CLASSES = 90"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Download Model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"opener = urllib.request.URLopener()\n",
"opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)\n",
"tar_file = tarfile.open(MODEL_FILE)\n",
"for file in tar_file.getmembers():\n",
" file_name = os.path.basename(file.name)\n",
" if 'frozen_inference_graph.pb' in file_name:\n",
" tar_file.extract(file, os.getcwd())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load a (frozen) Tensorflow model into memory."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"detection_graph = tf.Graph()\n",
"with detection_graph.as_default():\n",
" od_graph_def = tf.GraphDef()\n",
" with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:\n",
" serialized_graph = fid.read()\n",
" od_graph_def.ParseFromString(serialized_graph)\n",
" tf.import_graph_def(od_graph_def, name='')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Loading label map\n",
"Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"label_map = label_map_util.load_labelmap(PATH_TO_LABELS)\n",
"categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)\n",
"category_index = label_map_util.create_category_index(categories)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Helper code"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def load_image_into_numpy_array(image):\n",
" (im_width, im_height) = image.size\n",
" return np.array(image.getdata()).reshape(\n",
" (im_height, im_width, 3)).astype(np.uint8)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Detection"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# For the sake of simplicity we will use only 2 images:\n",
"# image1.jpg\n",
"# image2.jpg\n",
"# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.\n",
"PATH_TO_TEST_IMAGES_DIR = 'test_images'\n",
"TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 3) ]\n",
"\n",
"# Size, in inches, of the output images.\n",
"IMAGE_SIZE = (12, 8)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"with detection_graph.as_default():\n",
" with tf.Session(graph=detection_graph) as sess:\n",
" for image_path in TEST_IMAGE_PATHS:\n",
" image = Image.open(image_path)\n",
" # the array based representation of the image will be used later in order to prepare the\n",
" # result image with boxes and labels on it.\n",
" image_np = load_image_into_numpy_array(image)\n",
" # Expand dimensions since the model expects images to have shape: [1, None, None, 3]\n",
" image_np_expanded = np.expand_dims(image_np, axis=0)\n",
" image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')\n",
" # Each box represents a part of the image where a particular object was detected.\n",
" boxes = detection_graph.get_tensor_by_name('detection_boxes:0')\n",
" # Each score represent how level of confidence for each of the objects.\n",
" # Score is shown on the result image, together with the class label.\n",
" scores = detection_graph.get_tensor_by_name('detection_scores:0')\n",
" classes = detection_graph.get_tensor_by_name('detection_classes:0')\n",
" num_detections = detection_graph.get_tensor_by_name('num_detections:0')\n",
" # Actual detection.\n",
" (boxes, scores, classes, num_detections) = sess.run(\n",
" [boxes, scores, classes, num_detections],\n",
" feed_dict={image_tensor: image_np_expanded})\n",
" # Visualization of the results of a detection.\n",
" vis_util.visualize_boxes_and_labels_on_image_array(\n",
" image_np,\n",
" np.squeeze(boxes),\n",
" np.squeeze(classes).astype(np.int32),\n",
" np.squeeze(scores),\n",
" category_index,\n",
" use_normalized_coordinates=True,\n",
" line_thickness=8)\n",
" plt.figure(figsize=IMAGE_SIZE)\n",
" plt.imshow(image_np)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
# Tensorflow Object Detection API: Configuration protos.
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
proto_library(
name = "argmax_matcher_proto",
srcs = ["argmax_matcher.proto"],
)
py_proto_library(
name = "argmax_matcher_py_pb2",
api_version = 2,
deps = [":argmax_matcher_proto"],
)
proto_library(
name = "bipartite_matcher_proto",
srcs = ["bipartite_matcher.proto"],
)
py_proto_library(
name = "bipartite_matcher_py_pb2",
api_version = 2,
deps = [":bipartite_matcher_proto"],
)
proto_library(
name = "matcher_proto",
srcs = ["matcher.proto"],
deps = [
":argmax_matcher_proto",
":bipartite_matcher_proto",
],
)
py_proto_library(
name = "matcher_py_pb2",
api_version = 2,
deps = [":matcher_proto"],
)
proto_library(
name = "faster_rcnn_box_coder_proto",
srcs = ["faster_rcnn_box_coder.proto"],
)
py_proto_library(
name = "faster_rcnn_box_coder_py_pb2",
api_version = 2,
deps = [":faster_rcnn_box_coder_proto"],
)
proto_library(
name = "mean_stddev_box_coder_proto",
srcs = ["mean_stddev_box_coder.proto"],
)
py_proto_library(
name = "mean_stddev_box_coder_py_pb2",
api_version = 2,
deps = [":mean_stddev_box_coder_proto"],
)
proto_library(
name = "square_box_coder_proto",
srcs = ["square_box_coder.proto"],
)
py_proto_library(
name = "square_box_coder_py_pb2",
api_version = 2,
deps = [":square_box_coder_proto"],
)
proto_library(
name = "box_coder_proto",
srcs = ["box_coder.proto"],
deps = [
":faster_rcnn_box_coder_proto",
":mean_stddev_box_coder_proto",
":square_box_coder_proto",
],
)
py_proto_library(
name = "box_coder_py_pb2",
api_version = 2,
deps = [":box_coder_proto"],
)
proto_library(
name = "grid_anchor_generator_proto",
srcs = ["grid_anchor_generator.proto"],
)
py_proto_library(
name = "grid_anchor_generator_py_pb2",
api_version = 2,
deps = [":grid_anchor_generator_proto"],
)
proto_library(
name = "ssd_anchor_generator_proto",
srcs = ["ssd_anchor_generator.proto"],
)
py_proto_library(
name = "ssd_anchor_generator_py_pb2",
api_version = 2,
deps = [":ssd_anchor_generator_proto"],
)
proto_library(
name = "anchor_generator_proto",
srcs = ["anchor_generator.proto"],
deps = [
":grid_anchor_generator_proto",
":ssd_anchor_generator_proto",
],
)
py_proto_library(
name = "anchor_generator_py_pb2",
api_version = 2,
deps = [":anchor_generator_proto"],
)
proto_library(
name = "input_reader_proto",
srcs = ["input_reader.proto"],
)
py_proto_library(
name = "input_reader_py_pb2",
api_version = 2,
deps = [":input_reader_proto"],
)
proto_library(
name = "losses_proto",
srcs = ["losses.proto"],
)
py_proto_library(
name = "losses_py_pb2",
api_version = 2,
deps = [":losses_proto"],
)
proto_library(
name = "optimizer_proto",
srcs = ["optimizer.proto"],
)
py_proto_library(
name = "optimizer_py_pb2",
api_version = 2,
deps = [":optimizer_proto"],
)
proto_library(
name = "post_processing_proto",
srcs = ["post_processing.proto"],
)
py_proto_library(
name = "post_processing_py_pb2",
api_version = 2,
deps = [":post_processing_proto"],
)
proto_library(
name = "hyperparams_proto",
srcs = ["hyperparams.proto"],
)
py_proto_library(
name = "hyperparams_py_pb2",
api_version = 2,
deps = [":hyperparams_proto"],
)
proto_library(
name = "box_predictor_proto",
srcs = ["box_predictor.proto"],
deps = [":hyperparams_proto"],
)
py_proto_library(
name = "box_predictor_py_pb2",
api_version = 2,
deps = [":box_predictor_proto"],
)
proto_library(
name = "region_similarity_calculator_proto",
srcs = ["region_similarity_calculator.proto"],
deps = [],
)
py_proto_library(
name = "region_similarity_calculator_py_pb2",
api_version = 2,
deps = [":region_similarity_calculator_proto"],
)
proto_library(
name = "preprocessor_proto",
srcs = ["preprocessor.proto"],
)
py_proto_library(
name = "preprocessor_py_pb2",
api_version = 2,
deps = [":preprocessor_proto"],
)
proto_library(
name = "train_proto",
srcs = ["train.proto"],
deps = [
":optimizer_proto",
":preprocessor_proto",
],
)
py_proto_library(
name = "train_py_pb2",
api_version = 2,
deps = [":train_proto"],
)
proto_library(
name = "eval_proto",
srcs = ["eval.proto"],
)
py_proto_library(
name = "eval_py_pb2",
api_version = 2,
deps = [":eval_proto"],
)
proto_library(
name = "image_resizer_proto",
srcs = ["image_resizer.proto"],
)
py_proto_library(
name = "image_resizer_py_pb2",
api_version = 2,
deps = [":image_resizer_proto"],
)
proto_library(
name = "faster_rcnn_proto",
srcs = ["faster_rcnn.proto"],
deps = [
":box_predictor_proto",
"//object_detection/protos:anchor_generator_proto",
"//object_detection/protos:hyperparams_proto",
"//object_detection/protos:image_resizer_proto",
"//object_detection/protos:losses_proto",
"//object_detection/protos:post_processing_proto",
],
)
proto_library(
name = "ssd_proto",
srcs = ["ssd.proto"],
deps = [
":anchor_generator_proto",
":box_coder_proto",
":box_predictor_proto",
":hyperparams_proto",
":image_resizer_proto",
":losses_proto",
":matcher_proto",
":post_processing_proto",
":region_similarity_calculator_proto",
],
)
proto_library(
name = "model_proto",
srcs = ["model.proto"],
deps = [
":faster_rcnn_proto",
":ssd_proto",
],
)
py_proto_library(
name = "model_py_pb2",
api_version = 2,
deps = [":model_proto"],
)
proto_library(
name = "pipeline_proto",
srcs = ["pipeline.proto"],
deps = [
":eval_proto",
":input_reader_proto",
":model_proto",
":train_proto",
],
)
py_proto_library(
name = "pipeline_py_pb2",
api_version = 2,
deps = [":pipeline_proto"],
)
proto_library(
name = "string_int_label_map_proto",
srcs = ["string_int_label_map.proto"],
)
py_proto_library(
name = "string_int_label_map_py_pb2",
api_version = 2,
deps = [":string_int_label_map_proto"],
)
syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/grid_anchor_generator.proto";
import "object_detection/protos/ssd_anchor_generator.proto";
// Configuration proto for the anchor generator to use in the object detection
// pipeline. See core/anchor_generator.py for details.
message AnchorGenerator {
oneof anchor_generator_oneof {
GridAnchorGenerator grid_anchor_generator = 1;
SsdAnchorGenerator ssd_anchor_generator = 2;
}
}
syntax = "proto2";
package object_detection.protos;
// Configuration proto for ArgMaxMatcher. See
// matchers/argmax_matcher.py for details.
message ArgMaxMatcher {
// Threshold for positive matches.
optional float matched_threshold = 1 [default = 0.5];
// Threshold for negative matches.
optional float unmatched_threshold = 2 [default = 0.5];
// Whether to construct ArgMaxMatcher without thresholds.
optional bool ignore_thresholds = 3 [default = false];
// If True then negative matches are the ones below the unmatched_threshold,
// whereas ignored matches are in between the matched and umatched
// threshold. If False, then negative matches are in between the matched
// and unmatched threshold, and everything lower than unmatched is ignored.
optional bool negatives_lower_than_unmatched = 4 [default = true];
// Whether to ensure each row is matched to at least one column.
optional bool force_match_for_each_row = 5 [default = false];
}
syntax = "proto2";
package object_detection.protos;
// Configuration proto for bipartite matcher. See
// matchers/bipartite_matcher.py for details.
message BipartiteMatcher {
}
syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/faster_rcnn_box_coder.proto";
import "object_detection/protos/mean_stddev_box_coder.proto";
import "object_detection/protos/square_box_coder.proto";
// Configuration proto for the box coder to be used in the object detection
// pipeline. See core/box_coder.py for details.
message BoxCoder {
oneof box_coder_oneof {
FasterRcnnBoxCoder faster_rcnn_box_coder = 1;
MeanStddevBoxCoder mean_stddev_box_coder = 2;
SquareBoxCoder square_box_coder = 3;
}
}
syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/hyperparams.proto";
// Configuration proto for box predictor. See core/box_predictor.py for details.
message BoxPredictor {
oneof box_predictor_oneof {
ConvolutionalBoxPredictor convolutional_box_predictor = 1;
MaskRCNNBoxPredictor mask_rcnn_box_predictor = 2;
RfcnBoxPredictor rfcn_box_predictor = 3;
}
}
// Configuration proto for Convolutional box predictor.
message ConvolutionalBoxPredictor {
// Hyperparameters for convolution ops used in the box predictor.
optional Hyperparams conv_hyperparams = 1;
// Minumum feature depth prior to predicting box encodings and class
// predictions.
optional int32 min_depth = 2 [default = 0];
// Maximum feature depth prior to predicting box encodings and class
// predictions. If max_depth is set to 0, no additional feature map will be
// inserted before location and class predictions.
optional int32 max_depth = 3 [default = 0];
// Number of the additional conv layers before the predictor.
optional int32 num_layers_before_predictor = 4 [default = 0];
// Whether to use dropout for class prediction.
optional bool use_dropout = 5 [default = true];
// Keep probability for dropout
optional float dropout_keep_probability = 6 [default = 0.8];
// Size of final convolution kernel. If the spatial resolution of the feature
// map is smaller than the kernel size, then the kernel size is set to
// min(feature_width, feature_height).
optional int32 kernel_size = 7 [default = 1];
// Size of the encoding for boxes.
optional int32 box_code_size = 8 [default = 4];
// Whether to apply sigmoid to the output of class predictions.
// TODO: Do we need this since we have a post processing module.?
optional bool apply_sigmoid_to_scores = 9 [default = false];
}
message MaskRCNNBoxPredictor {
// Hyperparameters for fully connected ops used in the box predictor.
optional Hyperparams fc_hyperparams = 1;
// Whether to use dropout op prior to the both box and class predictions.
optional bool use_dropout = 2 [default= false];
// Keep probability for dropout. This is only used if use_dropout is true.
optional float dropout_keep_probability = 3 [default = 0.5];
// Size of the encoding for the boxes.
optional int32 box_code_size = 4 [default = 4];
// Hyperparameters for convolution ops used in the box predictor.
optional Hyperparams conv_hyperparams = 5;
// Whether to predict instance masks inside detection boxes.
optional bool predict_instance_masks = 6 [default = false];
// The depth for the first conv2d_transpose op applied to the
// image_features in the mask prediciton branch
optional int32 mask_prediction_conv_depth = 7 [default = 256];
// Whether to predict keypoints inside detection boxes.
optional bool predict_keypoints = 8 [default = false];
}
message RfcnBoxPredictor {
// Hyperparameters for convolution ops used in the box predictor.
optional Hyperparams conv_hyperparams = 1;
// Bin sizes for RFCN crops.
optional int32 num_spatial_bins_height = 2 [default = 3];
optional int32 num_spatial_bins_width = 3 [default = 3];
// Target depth to reduce the input image features to.
optional int32 depth = 4 [default=1024];
// Size of the encoding for the boxes.
optional int32 box_code_size = 5 [default = 4];
// Size to resize the rfcn crops to.
optional int32 crop_height = 6 [default= 12];
optional int32 crop_width = 7 [default=12];
}
syntax = "proto2";
package object_detection.protos;
// Message for configuring DetectionModel evaluation jobs (eval.py).
message EvalConfig {
// Number of visualization images to generate.
optional uint32 num_visualizations = 1 [default=10];
// Number of examples to process of evaluation.
optional uint32 num_examples = 2 [default=5000];
// How often to run evaluation.
optional uint32 eval_interval_secs = 3 [default=300];
// Maximum number of times to run evaluation. If set to 0, will run forever.
optional uint32 max_evals = 4 [default=0];
// Whether the TensorFlow graph used for evaluation should be saved to disk.
optional bool save_graph = 5 [default=false];
// Path to directory to store visualizations in. If empty, visualization
// images are not exported (only shown on Tensorboard).
optional string visualization_export_dir = 6 [default=""];
// BNS name of the TensorFlow master.
optional string eval_master = 7 [default=""];
// Type of metrics to use for evaluation. Currently supports only Pascal VOC
// detection metrics.
optional string metrics_set = 8 [default="pascal_voc_metrics"];
// Path to export detections to COCO compatible JSON format.
optional string export_path = 9 [default=''];
// Option to not read groundtruth labels and only export detections to
// COCO-compatible JSON file.
optional bool ignore_groundtruth = 10 [default=false];
// Use exponential moving averages of variables for evaluation.
// TODO: When this is false make sure the model is constructed
// without moving averages in restore_fn.
optional bool use_moving_averages = 11 [default=false];
// Whether to evaluate instance masks.
optional bool eval_instance_masks = 12 [default=false];
}
syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/anchor_generator.proto";
import "object_detection/protos/box_predictor.proto";
import "object_detection/protos/hyperparams.proto";
import "object_detection/protos/image_resizer.proto";
import "object_detection/protos/losses.proto";
import "object_detection/protos/post_processing.proto";
// Configuration for Faster R-CNN models.
// See meta_architectures/faster_rcnn_meta_arch.py and models/model_builder.py
//
// Naming conventions:
// Faster R-CNN models have two stages: a first stage region proposal network
// (or RPN) and a second stage box classifier. We thus use the prefixes
// `first_stage_` and `second_stage_` to indicate the stage to which each
// parameter pertains when relevant.
message FasterRcnn {
// Whether to construct only the Region Proposal Network (RPN).
optional bool first_stage_only = 1 [default=false];
// Number of classes to predict.
optional int32 num_classes = 3;
// Image resizer for preprocessing the input image.
optional ImageResizer image_resizer = 4;
// Feature extractor config.
optional FasterRcnnFeatureExtractor feature_extractor = 5;
// (First stage) region proposal network (RPN) parameters.
// Anchor generator to compute RPN anchors.
optional AnchorGenerator first_stage_anchor_generator = 6;
// Atrous rate for the convolution op applied to the
// `first_stage_features_to_crop` tensor to obtain box predictions.
optional int32 first_stage_atrous_rate = 7 [default=1];
// Hyperparameters for the convolutional RPN box predictor.
optional Hyperparams first_stage_box_predictor_conv_hyperparams = 8;
// Kernel size to use for the convolution op just prior to RPN box
// predictions.
optional int32 first_stage_box_predictor_kernel_size = 9 [default=3];
// Output depth for the convolution op just prior to RPN box predictions.
optional int32 first_stage_box_predictor_depth = 10 [default=512];
// The batch size to use for computing the first stage objectness and
// location losses.
optional int32 first_stage_minibatch_size = 11 [default=256];
// Fraction of positive examples per image for the RPN.
optional float first_stage_positive_balance_fraction = 12 [default=0.5];
// Non max suppression score threshold applied to first stage RPN proposals.
optional float first_stage_nms_score_threshold = 13 [default=0.0];
// Non max suppression IOU threshold applied to first stage RPN proposals.
optional float first_stage_nms_iou_threshold = 14 [default=0.7];
// Maximum number of RPN proposals retained after first stage postprocessing.
optional int32 first_stage_max_proposals = 15 [default=300];
// First stage RPN localization loss weight.
optional float first_stage_localization_loss_weight = 16 [default=1.0];
// First stage RPN objectness loss weight.
optional float first_stage_objectness_loss_weight = 17 [default=1.0];
// Per-region cropping parameters.
// Note that if a R-FCN model is constructed the per region cropping
// parameters below are ignored.
// Output size (width and height are set to be the same) of the initial
// bilinear interpolation based cropping during ROI pooling.
optional int32 initial_crop_size = 18;
// Kernel size of the max pool op on the cropped feature map during
// ROI pooling.
optional int32 maxpool_kernel_size = 19;
// Stride of the max pool op on the cropped feature map during ROI pooling.
optional int32 maxpool_stride = 20;
// (Second stage) box classifier parameters
// Hyperparameters for the second stage box predictor. If box predictor type
// is set to rfcn_box_predictor, a R-FCN model is constructed, otherwise a
// Faster R-CNN model is constructed.
optional BoxPredictor second_stage_box_predictor = 21;
// The batch size per image used for computing the classification and refined
// location loss of the box classifier.
// Note that this field is ignored if `hard_example_miner` is configured.
optional int32 second_stage_batch_size = 22 [default=64];
// Fraction of positive examples to use per image for the box classifier.
optional float second_stage_balance_fraction = 23 [default=0.25];
// Post processing to apply on the second stage box classifier predictions.
// Note: the `score_converter` provided to the FasterRCNNMetaArch constructor
// is taken from this `second_stage_post_processing` proto.
optional PostProcessing second_stage_post_processing = 24;
// Second stage refined localization loss weight.
optional float second_stage_localization_loss_weight = 25 [default=1.0];
// Second stage classification loss weight
optional float second_stage_classification_loss_weight = 26 [default=1.0];
// If not left to default, applies hard example mining.
optional HardExampleMiner hard_example_miner = 27;
}
message FasterRcnnFeatureExtractor {
// Type of Faster R-CNN model (e.g., 'faster_rcnn_resnet101';
// See models/model_builder.py for expected types).
optional string type = 1;
// Output stride of extracted RPN feature map.
optional int32 first_stage_features_stride = 2 [default=16];
}
syntax = "proto2";
package object_detection.protos;
// Configuration proto for FasterRCNNBoxCoder. See
// box_coders/faster_rcnn_box_coder.py for details.
message FasterRcnnBoxCoder {
// Scale factor for anchor encoded box center.
optional float y_scale = 1 [default = 10.0];
optional float x_scale = 2 [default = 10.0];
// Scale factor for anchor encoded box height.
optional float height_scale = 3 [default = 5.0];
// Scale factor for anchor encoded box width.
optional float width_scale = 4 [default = 5.0];
}
syntax = "proto2";
package object_detection.protos;
// Configuration proto for GridAnchorGenerator. See
// anchor_generators/grid_anchor_generator.py for details.
message GridAnchorGenerator {
// Anchor height in pixels.
optional int32 height = 1 [default = 256];
// Anchor width in pixels.
optional int32 width = 2 [default = 256];
// Anchor stride in height dimension in pixels.
optional int32 height_stride = 3 [default = 16];
// Anchor stride in width dimension in pixels.
optional int32 width_stride = 4 [default = 16];
// Anchor height offset in pixels.
optional int32 height_offset = 5 [default = 0];
// Anchor width offset in pixels.
optional int32 width_offset = 6 [default = 0];
// At any given location, len(scales) * len(aspect_ratios) anchors are
// generated with all possible combinations of scales and aspect ratios.
// List of scales for the anchors.
repeated float scales = 7;
// List of aspect ratios for the anchors.
repeated float aspect_ratios = 8;
}
syntax = "proto2";
package object_detection.protos;
// Configuration proto for the convolution op hyperparameters to use in the
// object detection pipeline.
message Hyperparams {
// Operations affected by hyperparameters.
enum Op {
// Convolution, Separable Convolution, Convolution transpose.
CONV = 1;
// Fully connected
FC = 2;
}
optional Op op = 1 [default = CONV];
// Regularizer for the weights of the convolution op.
optional Regularizer regularizer = 2;
// Initializer for the weights of the convolution op.
optional Initializer initializer = 3;
// Type of activation to apply after convolution.
enum Activation {
// Use None (no activation)
NONE = 0;
// Use tf.nn.relu
RELU = 1;
// Use tf.nn.relu6
RELU_6 = 2;
}
optional Activation activation = 4 [default = RELU];
// BatchNorm hyperparameters. If this parameter is NOT set then BatchNorm is
// not applied!
optional BatchNorm batch_norm = 5;
}
// Proto with one-of field for regularizers.
message Regularizer {
oneof regularizer_oneof {
L1Regularizer l1_regularizer = 1;
L2Regularizer l2_regularizer = 2;
}
}
// Configuration proto for L1 Regularizer.
// See https://www.tensorflow.org/api_docs/python/tf/contrib/layers/l1_regularizer
message L1Regularizer {
optional float weight = 1 [default = 1.0];
}
// Configuration proto for L2 Regularizer.
// See https://www.tensorflow.org/api_docs/python/tf/contrib/layers/l2_regularizer
message L2Regularizer {
optional float weight = 1 [default = 1.0];
}
// Proto with one-of field for initializers.
message Initializer {
oneof initializer_oneof {
TruncatedNormalInitializer truncated_normal_initializer = 1;
VarianceScalingInitializer variance_scaling_initializer = 2;
}
}
// Configuration proto for truncated normal initializer. See
// https://www.tensorflow.org/api_docs/python/tf/truncated_normal_initializer
message TruncatedNormalInitializer {
optional float mean = 1 [default = 0.0];
optional float stddev = 2 [default = 1.0];
}
// Configuration proto for variance scaling initializer. See
// https://www.tensorflow.org/api_docs/python/tf/contrib/layers/
// variance_scaling_initializer
message VarianceScalingInitializer {
optional float factor = 1 [default = 2.0];
optional bool uniform = 2 [default = false];
enum Mode {
FAN_IN = 0;
FAN_OUT = 1;
FAN_AVG = 2;
}
optional Mode mode = 3 [default = FAN_IN];
}
// Configuration proto for batch norm to apply after convolution op. See
// https://www.tensorflow.org/api_docs/python/tf/contrib/layers/batch_norm
message BatchNorm {
optional float decay = 1 [default = 0.999];
optional bool center = 2 [default = true];
optional bool scale = 3 [default = false];
optional float epsilon = 4 [default = 0.001];
// Whether to train the batch norm variables. If this is set to false during
// training, the current value of the batch_norm variables are used for
// forward pass but they are never updated.
optional bool train = 5 [default = true];
}
syntax = "proto2";
package object_detection.protos;
// Configuration proto for image resizing operations.
// See builders/image_resizer_builder.py for details.
message ImageResizer {
oneof image_resizer_oneof {
KeepAspectRatioResizer keep_aspect_ratio_resizer = 1;
FixedShapeResizer fixed_shape_resizer = 2;
}
}
// Configuration proto for image resizer that keeps aspect ratio.
message KeepAspectRatioResizer {
// Desired size of the smaller image dimension in pixels.
optional int32 min_dimension = 1 [default = 600];
// Desired size of the larger image dimension in pixels.
optional int32 max_dimension = 2 [default = 1024];
}
// Configuration proto for image resizer that resizes to a fixed shape.
message FixedShapeResizer {
// Desired height of image in pixels.
optional int32 height = 1 [default = 300];
// Desired width of image in pixels.
optional int32 width = 2 [default = 300];
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment