Unverified Commit 70255908 authored by pkulzc's avatar pkulzc Committed by GitHub
Browse files

Object detection Internal Changes. (#4757)

* Merged commit includes the following changes:
204316992  by Zhichao Lu:

    Update docs to prepare inputs

--
204309254  by Zhichao Lu:

    Update running_pets.md to use new binaries and correct a few things in running_on_cloud.md

--
204306734  by Zhichao Lu:

    Move old binaries into legacy folder and add deprecation notice.

--
204267757  by Zhichao Lu:

    Fixing a problem in VRD evaluation with missing ground truth annotations for
    images that do not contain objects from 62 groundtruth classes.

--
204167430  by Zhichao Lu:

    This fixes a flaky losses test failure.

--
203670721  by Zhichao Lu:

    Internal change.

--
203569388  by Zhichao Lu:

    Internal change

203546580  by Zhichao Lu:

    * Expand TPU compatibility g3doc with config snippets
    * Change mscoco dataset path in sample configs to the sharded versions

--
203325694  by Zhichao Lu:

    Make merge_multiple_label_boxes work for model_main code path.

--
203305655  by Zhichao Lu:

    Remove the 1x1 conv layer before pooling in MobileNet-v1-PPN feature extractor.

--
203139608  by Zhichao Lu:

    - Support exponential_decay with burnin learning rate schedule.
    - Add the minimum learning rate option.
    - Make the exponential decay start only after the burnin steps.

--
203068703  by Zhichao Lu:

    Modify create_coco_tf_record.py to output sharded files.

--
203025308  by Zhichao Lu:

    Add an option to share the prediction tower in WeightSharedBoxPredictor.

--
203024942  by Zhichao Lu:

    Move ssd mobilenet v1 ppn configs to third party.

--
202901259  by Zhichao Lu:

    Delete obsolete ssd mobilenet v1 focal loss configs and update pets dataset path

--
202894154  by Zhichao Lu:

    Move all TPU compatible ssd mobilenet v1 coco14/pet configs to third party.

--
202861774  by Zhichao Lu:

    Move Retinanet (SSD + FPN + Shared box predictor) configs to third_party.

--

PiperOrigin-RevId: 204316992

* Add original files back.
parent ee6fdda1
...@@ -46,10 +46,10 @@ import json ...@@ -46,10 +46,10 @@ import json
import os import os
import tensorflow as tf import tensorflow as tf
from object_detection import trainer
from object_detection.builders import dataset_builder from object_detection.builders import dataset_builder
from object_detection.builders import graph_rewriter_builder from object_detection.builders import graph_rewriter_builder
from object_detection.builders import model_builder from object_detection.builders import model_builder
from object_detection.legacy import trainer
from object_detection.utils import config_util from object_detection.utils import config_util
tf.logging.set_verbosity(tf.logging.INFO) tf.logging.set_verbosity(tf.logging.INFO)
...@@ -84,6 +84,7 @@ flags.DEFINE_string('model_config_path', '', ...@@ -84,6 +84,7 @@ flags.DEFINE_string('model_config_path', '',
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
@tf.contrib.framework.deprecated(None, 'Use object_detection/model_main.py.')
def main(_): def main(_):
assert FLAGS.train_dir, '`train_dir` is missing.' assert FLAGS.train_dir, '`train_dir` is missing.'
if FLAGS.task == 0: tf.gfile.MakeDirs(FLAGS.train_dir) if FLAGS.task == 0: tf.gfile.MakeDirs(FLAGS.train_dir)
......
...@@ -19,10 +19,10 @@ import tensorflow as tf ...@@ -19,10 +19,10 @@ import tensorflow as tf
from google.protobuf import text_format from google.protobuf import text_format
from object_detection import trainer
from object_detection.core import losses from object_detection.core import losses
from object_detection.core import model from object_detection.core import model
from object_detection.core import standard_fields as fields from object_detection.core import standard_fields as fields
from object_detection.legacy import trainer
from object_detection.protos import train_pb2 from object_detection.protos import train_pb2
......
...@@ -36,8 +36,8 @@ import os ...@@ -36,8 +36,8 @@ import os
import re import re
import tensorflow as tf import tensorflow as tf
from object_detection import evaluator
from object_detection.core import standard_fields from object_detection.core import standard_fields
from object_detection.legacy import evaluator
from object_detection.metrics import tf_example_parser from object_detection.metrics import tf_example_parser
from object_detection.utils import config_util from object_detection.utils import config_util
from object_detection.utils import label_map_util from object_detection.utils import label_map_util
......
...@@ -223,3 +223,69 @@ def fpn_top_down_feature_maps(image_features, depth, scope=None): ...@@ -223,3 +223,69 @@ def fpn_top_down_feature_maps(image_features, depth, scope=None):
output_feature_map_keys.append('top_down_%s' % image_features[level][0]) output_feature_map_keys.append('top_down_%s' % image_features[level][0])
return collections.OrderedDict( return collections.OrderedDict(
reversed(zip(output_feature_map_keys, output_feature_maps_list))) reversed(zip(output_feature_map_keys, output_feature_maps_list)))
def pooling_pyramid_feature_maps(base_feature_map_depth, num_layers,
image_features):
"""Generates pooling pyramid feature maps.
The pooling pyramid feature maps is motivated by
multi_resolution_feature_maps. The main difference are that it is simpler and
reduces the number of free parameters.
More specifically:
- Instead of using convolutions to shrink the feature map, it uses max
pooling, therefore totally gets rid of the parameters in convolution.
- By pooling feature from larger map up to a single cell, it generates
features in the same feature space.
- Instead of independently making box predictions from individual maps, it
shares the same classifier across different feature maps, therefore reduces
the "mis-calibration" across different scales.
See go/ppn-detection for more details.
Args:
base_feature_map_depth: Depth of the base feature before the max pooling.
num_layers: Number of layers used to make predictions. They are pooled
from the base feature.
image_features: A dictionary of handles to activation tensors from the
feature extractor.
Returns:
feature_maps: an OrderedDict mapping keys (feature map names) to
tensors where each tensor has shape [batch, height_i, width_i, depth_i].
Raises:
ValueError: image_features does not contain exactly one entry
"""
if len(image_features) != 1:
raise ValueError('image_features should be a dictionary of length 1.')
image_features = image_features[image_features.keys()[0]]
feature_map_keys = []
feature_maps = []
feature_map_key = 'Base_Conv2d_1x1_%d' % base_feature_map_depth
if base_feature_map_depth > 0:
image_features = slim.conv2d(
image_features,
base_feature_map_depth,
[1, 1], # kernel size
padding='SAME', stride=1, scope=feature_map_key)
# Add a 1x1 max-pooling node (a no op node) immediately after the conv2d for
# TPU v1 compatibility. Without the following dummy op, TPU runtime
# compiler will combine the convolution with one max-pooling below into a
# single cycle, so getting the conv2d feature becomes impossible.
image_features = slim.max_pool2d(
image_features, [1, 1], padding='SAME', stride=1, scope=feature_map_key)
feature_map_keys.append(feature_map_key)
feature_maps.append(image_features)
feature_map = image_features
with slim.arg_scope([slim.max_pool2d], padding='SAME', stride=2):
for i in range(num_layers - 1):
feature_map_key = 'MaxPool2d_%d_2x2' % i
feature_map = slim.max_pool2d(
feature_map, [2, 2], padding='SAME', scope=feature_map_key)
feature_map_keys.append(feature_map_key)
feature_maps.append(feature_map)
return collections.OrderedDict(
[(x, y) for (x, y) in zip(feature_map_keys, feature_maps)])
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""SSD MobilenetV1 FPN Feature Extractor."""
import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators
from object_detection.utils import context_manager
from object_detection.utils import ops
from object_detection.utils import shape_utils
from nets import mobilenet_v1
slim = tf.contrib.slim
class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""SSD Feature Extractor using MobilenetV1 FPN features."""
def preprocess(self, resized_inputs):
"""SSD preprocessing.
Maps pixel values to the range [-1, 1].
Args:
resized_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
return (2.0 / 255.0) * resized_inputs - 1.0
def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
preprocessed_inputs = shape_utils.check_min_image_dim(
33, preprocessed_inputs)
with tf.variable_scope('MobilenetV1',
reuse=self._reuse_weights) as scope:
with slim.arg_scope(
mobilenet_v1.mobilenet_v1_arg_scope(
is_training=None, regularize_depthwise=True)):
with (slim.arg_scope(self._conv_hyperparams_fn())
if self._override_base_feature_extractor_hyperparams
else context_manager.IdentityContextManager()):
_, image_features = mobilenet_v1.mobilenet_v1_base(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
final_endpoint='Conv2d_13_pointwise',
min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
use_explicit_padding=self._use_explicit_padding,
scope=scope)
depth_fn = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
with slim.arg_scope(self._conv_hyperparams_fn()):
with tf.variable_scope('fpn', reuse=self._reuse_weights):
fpn_features = feature_map_generators.fpn_top_down_feature_maps(
[(key, image_features[key])
for key in ['Conv2d_5_pointwise', 'Conv2d_11_pointwise',
'Conv2d_13_pointwise']],
depth=depth_fn(256))
last_feature_map = fpn_features['top_down_Conv2d_13_pointwise']
coarse_features = {}
for i in range(14, 16):
last_feature_map = slim.conv2d(
last_feature_map,
num_outputs=depth_fn(256),
kernel_size=[3, 3],
stride=2,
padding='SAME',
scope='bottom_up_Conv2d_{}'.format(i))
coarse_features['bottom_up_Conv2d_{}'.format(i)] = last_feature_map
return [fpn_features['top_down_Conv2d_5_pointwise'],
fpn_features['top_down_Conv2d_11_pointwise'],
fpn_features['top_down_Conv2d_13_pointwise'],
coarse_features['bottom_up_Conv2d_14'],
coarse_features['bottom_up_Conv2d_15']]
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for ssd_mobilenet_v1_fpn_feature_extractor."""
import numpy as np
import tensorflow as tf
from object_detection.models import ssd_feature_extractor_test
from object_detection.models import ssd_mobilenet_v1_fpn_feature_extractor
slim = tf.contrib.slim
class SsdMobilenetV1FpnFeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, use_explicit_padding=False):
"""Constructs a new feature extractor.
Args:
depth_multiplier: float depth multiplier for feature extractor
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
is_training: whether the network is in training mode.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
Returns:
an ssd_meta_arch.SSDFeatureExtractor object.
"""
min_depth = 32
return (ssd_mobilenet_v1_fpn_feature_extractor.
SSDMobileNetV1FpnFeatureExtractor(
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
self.conv_hyperparams_fn,
use_explicit_padding=use_explicit_padding))
def test_extract_features_returns_correct_shapes_256(self):
image_height = 256
image_width = 256
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(2, 32, 32, 256), (2, 16, 16, 256),
(2, 8, 8, 256), (2, 4, 4, 256),
(2, 2, 2, 256)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=False)
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=True)
def test_extract_features_returns_correct_shapes_384(self):
image_height = 320
image_width = 320
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(2, 40, 40, 256), (2, 20, 20, 256),
(2, 10, 10, 256), (2, 5, 5, 256),
(2, 3, 3, 256)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=False)
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=True)
def test_extract_features_with_dynamic_image_shape(self):
image_height = 256
image_width = 256
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(2, 32, 32, 256), (2, 16, 16, 256),
(2, 8, 8, 256), (2, 4, 4, 256),
(2, 2, 2, 256)]
self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=False)
self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=True)
def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
image_height = 299
image_width = 299
depth_multiplier = 1.0
pad_to_multiple = 32
expected_feature_map_shape = [(2, 40, 40, 256), (2, 20, 20, 256),
(2, 10, 10, 256), (2, 5, 5, 256),
(2, 3, 3, 256)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=False)
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=True)
def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
image_height = 256
image_width = 256
depth_multiplier = 0.5**12
pad_to_multiple = 1
expected_feature_map_shape = [(2, 32, 32, 32), (2, 16, 16, 32),
(2, 8, 8, 32), (2, 4, 4, 32),
(2, 2, 2, 32)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=False)
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=True)
def test_extract_features_raises_error_with_invalid_image_size(self):
image_height = 32
image_width = 32
depth_multiplier = 1.0
pad_to_multiple = 1
self.check_extract_features_raises_error_with_invalid_image_size(
image_height, image_width, depth_multiplier, pad_to_multiple)
def test_preprocess_returns_correct_value_range(self):
image_height = 256
image_width = 256
depth_multiplier = 1
pad_to_multiple = 1
test_image = np.random.rand(2, image_height, image_width, 3)
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
preprocessed_image = feature_extractor.preprocess(test_image)
self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
def test_variables_only_created_in_scope(self):
depth_multiplier = 1
pad_to_multiple = 1
scope_name = 'MobilenetV1'
self.check_feature_extractor_variables_under_scope(
depth_multiplier, pad_to_multiple, scope_name)
def test_fused_batchnorm(self):
image_height = 256
image_width = 256
depth_multiplier = 1
pad_to_multiple = 1
image_placeholder = tf.placeholder(tf.float32,
[1, image_height, image_width, 3])
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
preprocessed_image = feature_extractor.preprocess(image_placeholder)
_ = feature_extractor.extract_features(preprocessed_image)
self.assertTrue(
any(op.type == 'FusedBatchNorm'
for op in tf.get_default_graph().get_operations()))
if __name__ == '__main__':
tf.test.main()
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""SSDFeatureExtractor for MobilenetV1 PPN features."""
import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators
from object_detection.utils import context_manager
from object_detection.utils import ops
from object_detection.utils import shape_utils
from nets import mobilenet_v1
slim = tf.contrib.slim
class SSDMobileNetV1PpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""SSD Feature Extractor using MobilenetV1 PPN features."""
def preprocess(self, resized_inputs):
"""SSD preprocessing.
Maps pixel values to the range [-1, 1].
Args:
resized_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
return (2.0 / 255.0) * resized_inputs - 1.0
def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
preprocessed_inputs = shape_utils.check_min_image_dim(
33, preprocessed_inputs)
with tf.variable_scope('MobilenetV1',
reuse=self._reuse_weights) as scope:
with slim.arg_scope(
mobilenet_v1.mobilenet_v1_arg_scope(
is_training=None, regularize_depthwise=True)):
with (slim.arg_scope(self._conv_hyperparams_fn())
if self._override_base_feature_extractor_hyperparams
else context_manager.IdentityContextManager()):
_, image_features = mobilenet_v1.mobilenet_v1_base(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
final_endpoint='Conv2d_13_pointwise',
min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
use_explicit_padding=self._use_explicit_padding,
scope=scope)
with slim.arg_scope(self._conv_hyperparams_fn()):
feature_maps = feature_map_generators.pooling_pyramid_feature_maps(
base_feature_map_depth=0,
num_layers=6,
image_features={
'image_features': image_features['Conv2d_11_pointwise']
})
return feature_maps.values()
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for ssd_mobilenet_v1_ppn_feature_extractor."""
import numpy as np
import tensorflow as tf
from object_detection.models import ssd_feature_extractor_test
from object_detection.models import ssd_mobilenet_v1_ppn_feature_extractor
slim = tf.contrib.slim
class SsdMobilenetV1PpnFeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, use_explicit_padding=False):
"""Constructs a new feature extractor.
Args:
depth_multiplier: float depth multiplier for feature extractor
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
is_training: whether the network is in training mode.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
Returns:
an ssd_meta_arch.SSDFeatureExtractor object.
"""
min_depth = 32
return (ssd_mobilenet_v1_ppn_feature_extractor.
SSDMobileNetV1PpnFeatureExtractor(
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
self.conv_hyperparams_fn,
use_explicit_padding=use_explicit_padding))
def test_extract_features_returns_correct_shapes_320(self):
image_height = 320
image_width = 320
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(2, 20, 20, 512), (2, 10, 10, 512),
(2, 5, 5, 512), (2, 3, 3, 512),
(2, 2, 2, 512), (2, 1, 1, 512)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=False)
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=True)
def test_extract_features_returns_correct_shapes_300(self):
image_height = 300
image_width = 300
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(2, 19, 19, 512), (2, 10, 10, 512),
(2, 5, 5, 512), (2, 3, 3, 512),
(2, 2, 2, 512), (2, 1, 1, 512)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=False)
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=True)
def test_extract_features_returns_correct_shapes_640(self):
image_height = 640
image_width = 640
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(2, 40, 40, 512), (2, 20, 20, 512),
(2, 10, 10, 512), (2, 5, 5, 512),
(2, 3, 3, 512), (2, 2, 2, 512)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=False)
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=True)
def test_extract_features_with_dynamic_image_shape(self):
image_height = 320
image_width = 320
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(2, 20, 20, 512), (2, 10, 10, 512),
(2, 5, 5, 512), (2, 3, 3, 512),
(2, 2, 2, 512), (2, 1, 1, 512)]
self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=False)
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=True)
def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
image_height = 299
image_width = 299
depth_multiplier = 1.0
pad_to_multiple = 32
expected_feature_map_shape = [(2, 20, 20, 512), (2, 10, 10, 512),
(2, 5, 5, 512), (2, 3, 3, 512),
(2, 2, 2, 512)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=False)
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=True)
def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
image_height = 256
image_width = 256
depth_multiplier = 0.5**12
pad_to_multiple = 1
expected_feature_map_shape = [(2, 16, 16, 32), (2, 8, 8, 32),
(2, 4, 4, 32), (2, 2, 2, 32),
(2, 1, 1, 32)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=False)
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=True)
def test_extract_features_raises_error_with_invalid_image_size(self):
image_height = 32
image_width = 32
depth_multiplier = 1.0
pad_to_multiple = 1
self.check_extract_features_raises_error_with_invalid_image_size(
image_height, image_width, depth_multiplier, pad_to_multiple)
def test_preprocess_returns_correct_value_range(self):
image_height = 128
image_width = 128
depth_multiplier = 1
pad_to_multiple = 1
test_image = np.random.rand(2, image_height, image_width, 3)
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
preprocessed_image = feature_extractor.preprocess(test_image)
self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
def test_variables_only_created_in_scope(self):
depth_multiplier = 1
pad_to_multiple = 1
scope_name = 'MobilenetV1'
self.check_feature_extractor_variables_under_scope(
depth_multiplier, pad_to_multiple, scope_name)
def test_has_fused_batchnorm(self):
image_height = 320
image_width = 320
depth_multiplier = 1
pad_to_multiple = 1
image_placeholder = tf.placeholder(tf.float32,
[1, image_height, image_width, 3])
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
preprocessed_image = feature_extractor.preprocess(image_placeholder)
_ = feature_extractor.extract_features(preprocessed_image)
self.assertTrue(any(op.type == 'FusedBatchNorm'
for op in tf.get_default_graph().get_operations()))
if __name__ == '__main__':
tf.test.main()
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""SSD feature extractors based on Resnet v1 and PPN architectures."""
import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators
from object_detection.utils import context_manager
from object_detection.utils import ops
from object_detection.utils import shape_utils
from nets import resnet_v1
slim = tf.contrib.slim
class _SSDResnetPpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""SSD feature extractor based on resnet architecture and PPN."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams_fn,
resnet_base_fn,
resnet_scope_name,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False,
base_feature_map_depth=1024,
num_layers=6,
override_base_feature_extractor_hyperparams=False,
use_bounded_activations=False):
"""Resnet based PPN Feature Extractor for SSD Models.
See go/pooling-pyramid for more details about PPN.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops in the layers that are added on top of the
base feature extractor.
resnet_base_fn: base resnet network to use.
resnet_scope_name: scope name to construct resnet
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
base_feature_map_depth: Depth of the base feature before the max pooling.
num_layers: Number of layers used to make predictions. They are pooled
from the base feature.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_fn`.
use_bounded_activations: Whether or not to use bounded activations for
resnet v1 bottleneck residual unit. Bounded activations better lend
themselves to quantized inference.
"""
super(_SSDResnetPpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise,
override_base_feature_extractor_hyperparams)
self._resnet_base_fn = resnet_base_fn
self._resnet_scope_name = resnet_scope_name
self._base_feature_map_depth = base_feature_map_depth
self._num_layers = num_layers
self._use_bounded_activations = use_bounded_activations
def _filter_features(self, image_features):
# TODO(rathodv): Change resnet endpoint to strip scope prefixes instead
# of munging the scope here.
filtered_image_features = dict({})
for key, feature in image_features.items():
feature_name = key.split('/')[-1]
if feature_name in ['block2', 'block3', 'block4']:
filtered_image_features[feature_name] = feature
return filtered_image_features
def preprocess(self, resized_inputs):
"""SSD preprocessing.
VGG style channel mean subtraction as described here:
https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-mdnge.
Args:
resized_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
channel_means = [123.68, 116.779, 103.939]
return resized_inputs - [[channel_means]]
def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
Raises:
ValueError: depth multiplier is not supported.
"""
if self._depth_multiplier != 1.0:
raise ValueError('Depth multiplier not supported.')
preprocessed_inputs = shape_utils.check_min_image_dim(
129, preprocessed_inputs)
with tf.variable_scope(
self._resnet_scope_name, reuse=self._reuse_weights) as scope:
with slim.arg_scope(resnet_v1.resnet_arg_scope()):
with (slim.arg_scope(self._conv_hyperparams_fn())
if self._override_base_feature_extractor_hyperparams else
context_manager.IdentityContextManager()):
with slim.arg_scope(
[resnet_v1.bottleneck],
use_bounded_activations=self._use_bounded_activations):
_, activations = self._resnet_base_fn(
inputs=ops.pad_to_multiple(preprocessed_inputs,
self._pad_to_multiple),
num_classes=None,
is_training=None,
global_pool=False,
output_stride=None,
store_non_strided_activations=True,
scope=scope)
with slim.arg_scope(self._conv_hyperparams_fn()):
feature_maps = feature_map_generators.pooling_pyramid_feature_maps(
base_feature_map_depth=self._base_feature_map_depth,
num_layers=self._num_layers,
image_features={
'image_features': self._filter_features(activations)['block3']
})
return feature_maps.values()
class SSDResnet50V1PpnFeatureExtractor(_SSDResnetPpnFeatureExtractor):
"""PPN Resnet50 v1 Feature Extractor."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams_fn,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False,
override_base_feature_extractor_hyperparams=False):
"""Resnet50 v1 Feature Extractor for SSD Models.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops in the layers that are added on top of the
base feature extractor.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_fn`.
"""
super(SSDResnet50V1PpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams_fn, resnet_v1.resnet_v1_50, 'resnet_v1_50',
reuse_weights, use_explicit_padding, use_depthwise,
override_base_feature_extractor_hyperparams=(
override_base_feature_extractor_hyperparams))
class SSDResnet101V1PpnFeatureExtractor(_SSDResnetPpnFeatureExtractor):
"""PPN Resnet101 v1 Feature Extractor."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams_fn,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False,
override_base_feature_extractor_hyperparams=False):
"""Resnet101 v1 Feature Extractor for SSD Models.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops in the layers that are added on top of the
base feature extractor.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_fn`.
"""
super(SSDResnet101V1PpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams_fn, resnet_v1.resnet_v1_101, 'resnet_v1_101',
reuse_weights, use_explicit_padding, use_depthwise,
override_base_feature_extractor_hyperparams=(
override_base_feature_extractor_hyperparams))
class SSDResnet152V1PpnFeatureExtractor(_SSDResnetPpnFeatureExtractor):
"""PPN Resnet152 v1 Feature Extractor."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams_fn,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False,
override_base_feature_extractor_hyperparams=False):
"""Resnet152 v1 Feature Extractor for SSD Models.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops in the layers that are added on top of the
base feature extractor.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_fn`.
"""
super(SSDResnet152V1PpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams_fn, resnet_v1.resnet_v1_152, 'resnet_v1_152',
reuse_weights, use_explicit_padding, use_depthwise,
override_base_feature_extractor_hyperparams=(
override_base_feature_extractor_hyperparams))
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for ssd resnet v1 feature extractors."""
import tensorflow as tf
from object_detection.models import ssd_resnet_v1_ppn_feature_extractor
from object_detection.models import ssd_resnet_v1_ppn_feature_extractor_testbase
class SSDResnet50V1PpnFeatureExtractorTest(
ssd_resnet_v1_ppn_feature_extractor_testbase.
SSDResnetPpnFeatureExtractorTestBase):
"""SSDResnet50v1 feature extractor test."""
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
use_explicit_padding=False):
min_depth = 32
is_training = True
return ssd_resnet_v1_ppn_feature_extractor.SSDResnet50V1PpnFeatureExtractor(
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
self.conv_hyperparams_fn,
use_explicit_padding=use_explicit_padding)
def _scope_name(self):
return 'resnet_v1_50'
class SSDResnet101V1PpnFeatureExtractorTest(
ssd_resnet_v1_ppn_feature_extractor_testbase.
SSDResnetPpnFeatureExtractorTestBase):
"""SSDResnet101v1 feature extractor test."""
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
use_explicit_padding=False):
min_depth = 32
is_training = True
return (
ssd_resnet_v1_ppn_feature_extractor.SSDResnet101V1PpnFeatureExtractor(
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
self.conv_hyperparams_fn,
use_explicit_padding=use_explicit_padding))
def _scope_name(self):
return 'resnet_v1_101'
class SSDResnet152V1PpnFeatureExtractorTest(
ssd_resnet_v1_ppn_feature_extractor_testbase.
SSDResnetPpnFeatureExtractorTestBase):
"""SSDResnet152v1 feature extractor test."""
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
use_explicit_padding=False):
min_depth = 32
is_training = True
return (
ssd_resnet_v1_ppn_feature_extractor.SSDResnet152V1PpnFeatureExtractor(
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
self.conv_hyperparams_fn,
use_explicit_padding=use_explicit_padding))
def _scope_name(self):
return 'resnet_v1_152'
if __name__ == '__main__':
tf.test.main()
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for ssd resnet v1 feature extractors."""
import abc
import numpy as np
from object_detection.models import ssd_feature_extractor_test
class SSDResnetPpnFeatureExtractorTestBase(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
"""Helper test class for SSD Resnet PPN feature extractors."""
@abc.abstractmethod
def _scope_name(self):
pass
def test_extract_features_returns_correct_shapes_289(self):
image_height = 289
image_width = 289
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(2, 19, 19, 1024), (2, 10, 10, 1024),
(2, 5, 5, 1024), (2, 3, 3, 1024),
(2, 2, 2, 1024), (2, 1, 1, 1024)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_with_dynamic_inputs(self):
image_height = 289
image_width = 289
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(2, 19, 19, 1024), (2, 10, 10, 1024),
(2, 5, 5, 1024), (2, 3, 3, 1024),
(2, 2, 2, 1024), (2, 1, 1, 1024)]
self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_raises_error_with_invalid_image_size(self):
image_height = 32
image_width = 32
depth_multiplier = 1.0
pad_to_multiple = 1
self.check_extract_features_raises_error_with_invalid_image_size(
image_height, image_width, depth_multiplier, pad_to_multiple)
def test_preprocess_returns_correct_value_range(self):
image_height = 128
image_width = 128
depth_multiplier = 1
pad_to_multiple = 1
test_image = np.random.rand(4, image_height, image_width, 3)
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
preprocessed_image = feature_extractor.preprocess(test_image)
self.assertAllClose(preprocessed_image,
test_image - [[123.68, 116.779, 103.939]])
def test_variables_only_created_in_scope(self):
depth_multiplier = 1
pad_to_multiple = 1
self.check_feature_extractor_variables_under_scope(
depth_multiplier, pad_to_multiple, self._scope_name())
...@@ -4,14 +4,14 @@ package object_detection.protos; ...@@ -4,14 +4,14 @@ package object_detection.protos;
import "object_detection/protos/hyperparams.proto"; import "object_detection/protos/hyperparams.proto";
// Configuration proto for box predictor. See core/box_predictor.py for details. // Configuration proto for box predictor. See core/box_predictor.py for details.
message BoxPredictor { message BoxPredictor {
oneof box_predictor_oneof { oneof box_predictor_oneof {
ConvolutionalBoxPredictor convolutional_box_predictor = 1; ConvolutionalBoxPredictor convolutional_box_predictor = 1;
MaskRCNNBoxPredictor mask_rcnn_box_predictor = 2; MaskRCNNBoxPredictor mask_rcnn_box_predictor = 2;
RfcnBoxPredictor rfcn_box_predictor = 3; RfcnBoxPredictor rfcn_box_predictor = 3;
WeightSharedConvolutionalBoxPredictor weight_shared_convolutional_box_predictor = 4; WeightSharedConvolutionalBoxPredictor
weight_shared_convolutional_box_predictor = 4;
} }
} }
...@@ -82,11 +82,15 @@ message WeightSharedConvolutionalBoxPredictor { ...@@ -82,11 +82,15 @@ message WeightSharedConvolutionalBoxPredictor {
// https://arxiv.org/abs/1708.02002 for details. // https://arxiv.org/abs/1708.02002 for details.
optional float class_prediction_bias_init = 10 [default = 0.0]; optional float class_prediction_bias_init = 10 [default = 0.0];
// Whether to use dropout for class prediction. // Whether to use dropout for class prediction.
optional bool use_dropout = 11 [default = false]; optional bool use_dropout = 11 [default = false];
// Keep probability for dropout // Keep probability for dropout
optional float dropout_keep_probability = 12 [default = 0.8]; optional float dropout_keep_probability = 12 [default = 0.8];
// Whether to share the multi-layer tower between box prediction and class
// prediction heads.
optional bool share_prediction_tower = 13 [default = false];
} }
message MaskRCNNBoxPredictor { message MaskRCNNBoxPredictor {
...@@ -94,7 +98,7 @@ message MaskRCNNBoxPredictor { ...@@ -94,7 +98,7 @@ message MaskRCNNBoxPredictor {
optional Hyperparams fc_hyperparams = 1; optional Hyperparams fc_hyperparams = 1;
// Whether to use dropout op prior to the both box and class predictions. // Whether to use dropout op prior to the both box and class predictions.
optional bool use_dropout = 2 [default= false]; optional bool use_dropout = 2 [default = false];
// Keep probability for dropout. This is only used if use_dropout is true. // Keep probability for dropout. This is only used if use_dropout is true.
optional float dropout_keep_probability = 3 [default = 0.5]; optional float dropout_keep_probability = 3 [default = 0.5];
...@@ -141,13 +145,13 @@ message RfcnBoxPredictor { ...@@ -141,13 +145,13 @@ message RfcnBoxPredictor {
optional int32 num_spatial_bins_width = 3 [default = 3]; optional int32 num_spatial_bins_width = 3 [default = 3];
// Target depth to reduce the input image features to. // Target depth to reduce the input image features to.
optional int32 depth = 4 [default=1024]; optional int32 depth = 4 [default = 1024];
// Size of the encoding for the boxes. // Size of the encoding for the boxes.
optional int32 box_code_size = 5 [default = 4]; optional int32 box_code_size = 5 [default = 4];
// Size to resize the rfcn crops to. // Size to resize the rfcn crops to.
optional int32 crop_height = 6 [default= 12]; optional int32 crop_height = 6 [default = 12];
optional int32 crop_width = 7 [default=12]; optional int32 crop_width = 7 [default = 12];
} }
...@@ -61,6 +61,9 @@ message ExponentialDecayLearningRate { ...@@ -61,6 +61,9 @@ message ExponentialDecayLearningRate {
optional uint32 decay_steps = 2 [default = 4000000]; optional uint32 decay_steps = 2 [default = 4000000];
optional float decay_factor = 3 [default = 0.95]; optional float decay_factor = 3 [default = 0.95];
optional bool staircase = 4 [default = true]; optional bool staircase = 4 [default = true];
optional float burnin_learning_rate = 5 [default = 0.0];
optional uint32 burnin_steps = 6 [default = 0];
optional float min_learning_rate = 7 [default = 0.0];
} }
// Configuration message for a manually defined learning rate schedule. // Configuration message for a manually defined learning rate schedule.
......
trainingInput: trainingInput:
runtimeVersion: "1.0" runtimeVersion: "1.8"
scaleTier: CUSTOM scaleTier: CUSTOM
masterType: standard_gpu masterType: standard_gpu
workerCount: 5 workerCount: 5
......
...@@ -166,7 +166,7 @@ train_config: { ...@@ -166,7 +166,7 @@ train_config: {
train_input_reader: { train_input_reader: {
tf_record_input_reader { tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record" input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record-?????-of-00100"
} }
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
} }
...@@ -178,7 +178,7 @@ eval_config: { ...@@ -178,7 +178,7 @@ eval_config: {
eval_input_reader: { eval_input_reader: {
tf_record_input_reader { tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record" input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record-?????-of-00010"
} }
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
shuffle: false shuffle: false
......
...@@ -120,7 +120,7 @@ train_config: { ...@@ -120,7 +120,7 @@ train_config: {
train_input_reader: { train_input_reader: {
tf_record_input_reader { tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record" input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record-?????-of-00100"
} }
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
} }
...@@ -134,7 +134,7 @@ eval_config: { ...@@ -134,7 +134,7 @@ eval_config: {
eval_input_reader: { eval_input_reader: {
tf_record_input_reader { tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record" input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record-?????-of-00010"
} }
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
shuffle: false shuffle: false
......
...@@ -110,7 +110,7 @@ train_config: { ...@@ -110,7 +110,7 @@ train_config: {
train_input_reader: { train_input_reader: {
tf_record_input_reader { tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record" input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record-?????-of-00100"
} }
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
} }
...@@ -124,7 +124,7 @@ eval_config: { ...@@ -124,7 +124,7 @@ eval_config: {
eval_input_reader: { eval_input_reader: {
tf_record_input_reader { tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record" input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record-?????-of-00010"
} }
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
shuffle: false shuffle: false
......
...@@ -121,7 +121,7 @@ train_config: { ...@@ -121,7 +121,7 @@ train_config: {
train_input_reader: { train_input_reader: {
tf_record_input_reader { tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/pet_faces_train.record-?????-of-?????" input_path: "PATH_TO_BE_CONFIGURED/pet_faces_train.record-?????-of-00010"
} }
label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
} }
...@@ -133,7 +133,7 @@ eval_config: { ...@@ -133,7 +133,7 @@ eval_config: {
eval_input_reader: { eval_input_reader: {
tf_record_input_reader { tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/pet_faces_val.record-?????-of-?????" input_path: "PATH_TO_BE_CONFIGURED/pet_faces_val.record-?????-of-00010"
} }
label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
shuffle: false shuffle: false
......
...@@ -119,7 +119,7 @@ train_config: { ...@@ -119,7 +119,7 @@ train_config: {
train_input_reader: { train_input_reader: {
tf_record_input_reader { tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record" input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record-?????-of-00100"
} }
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
} }
...@@ -133,7 +133,7 @@ eval_config: { ...@@ -133,7 +133,7 @@ eval_config: {
eval_input_reader: { eval_input_reader: {
tf_record_input_reader { tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record" input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record-?????-of-00010"
} }
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
shuffle: false shuffle: false
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment