Commit b3247557 authored by Dheera Venkatraman's avatar Dheera Venkatraman
Browse files

add flag for saving images to summary; strings moved to common.py'

parents 75c931fd 2041d5ca
......@@ -44,7 +44,7 @@ class FasterRcnnMobilenetV1FeatureExtractorTest(tf.test.TestCase):
with self.test_session() as sess:
sess.run(init_op)
features_shape_out = sess.run(features_shape)
self.assertAllEqual(features_shape_out, [4, 7, 7, 1024])
self.assertAllEqual(features_shape_out, [4, 14, 14, 512])
def test_extract_proposal_features_stride_eight(self):
feature_extractor = self._build_feature_extractor(
......@@ -59,7 +59,7 @@ class FasterRcnnMobilenetV1FeatureExtractorTest(tf.test.TestCase):
with self.test_session() as sess:
sess.run(init_op)
features_shape_out = sess.run(features_shape)
self.assertAllEqual(features_shape_out, [4, 7, 7, 1024])
self.assertAllEqual(features_shape_out, [4, 14, 14, 512])
def test_extract_proposal_features_half_size_input(self):
feature_extractor = self._build_feature_extractor(
......@@ -74,7 +74,7 @@ class FasterRcnnMobilenetV1FeatureExtractorTest(tf.test.TestCase):
with self.test_session() as sess:
sess.run(init_op)
features_shape_out = sess.run(features_shape)
self.assertAllEqual(features_shape_out, [1, 4, 4, 1024])
self.assertAllEqual(features_shape_out, [1, 7, 7, 512])
def test_extract_proposal_features_dies_on_invalid_stride(self):
with self.assertRaises(ValueError):
......
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""PNASNet Faster R-CNN implementation.
Based on PNASNet model: https://arxiv.org/abs/1712.00559
"""
import tensorflow as tf
from object_detection.meta_architectures import faster_rcnn_meta_arch
from nets.nasnet import nasnet_utils
from nets.nasnet import pnasnet
arg_scope = tf.contrib.framework.arg_scope
slim = tf.contrib.slim
def pnasnet_large_arg_scope_for_detection(is_batch_norm_training=False):
"""Defines the default arg scope for the PNASNet Large for object detection.
This provides a small edit to switch batch norm training on and off.
Args:
is_batch_norm_training: Boolean indicating whether to train with batch norm.
Returns:
An `arg_scope` to use for the PNASNet Large Model.
"""
imagenet_scope = pnasnet.pnasnet_large_arg_scope()
with arg_scope(imagenet_scope):
with arg_scope([slim.batch_norm], is_training=is_batch_norm_training) as sc:
return sc
def _filter_scaling(reduction_indices, start_cell_num):
"""Compute the expected filter scaling at given PNASNet cell start_cell_num.
In the pnasnet.py code, filter_scaling starts at 1.0. We instead
adapt filter scaling to depend on the starting cell.
At first cells, before any reduction, filter_scalling is 1.0. With passing
any reduction cell, the filter_scaling is multiplied by 2.
Args:
reduction_indices: list of int indices.
start_cell_num: int.
Returns:
filter_scaling: float.
"""
filter_scaling = 1.0
for ind in reduction_indices:
if ind < start_cell_num:
filter_scaling *= 2.0
return filter_scaling
# Note: This is largely a copy of _build_pnasnet_base inside pnasnet.py but
# with special edits to remove instantiation of the stem and the special
# ability to receive as input a pair of hidden states. It constructs only
# a sub-network from the original PNASNet model, starting from the
# start_cell_num cell and with modified final layer.
def _build_pnasnet_base(
hidden_previous, hidden, normal_cell, hparams, true_cell_num,
start_cell_num):
"""Constructs a PNASNet image model for proposal classifier features."""
# Find where to place the reduction cells or stride normal cells
reduction_indices = nasnet_utils.calc_reduction_layers(
hparams.num_cells, hparams.num_reduction_layers)
filter_scaling = _filter_scaling(reduction_indices, start_cell_num)
# Note: The None is prepended to match the behavior of _imagenet_stem()
cell_outputs = [None, hidden_previous, hidden]
net = hidden
# Run the cells
for cell_num in range(start_cell_num, hparams.num_cells):
is_reduction = cell_num in reduction_indices
stride = 2 if is_reduction else 1
if is_reduction: filter_scaling *= hparams.filter_scaling_rate
prev_layer = cell_outputs[-2]
net = normal_cell(
net,
scope='cell_{}'.format(cell_num),
filter_scaling=filter_scaling,
stride=stride,
prev_layer=prev_layer,
cell_num=true_cell_num)
true_cell_num += 1
cell_outputs.append(net)
# Final nonlinearity.
# Note that we have dropped the final pooling, dropout and softmax layers
# from the default pnasnet version.
with tf.variable_scope('final_layer'):
net = tf.nn.relu(net)
return net
# TODO(shlens): Only fixed_shape_resizer is currently supported for PNASNet
# featurization. The reason for this is that pnasnet.py only supports
# inputs with fully known shapes. We need to update pnasnet.py to handle
# shapes not known at compile time.
class FasterRCNNPNASFeatureExtractor(
faster_rcnn_meta_arch.FasterRCNNFeatureExtractor):
"""Faster R-CNN with PNASNet feature extractor implementation."""
def __init__(self,
is_training,
first_stage_features_stride,
batch_norm_trainable=False,
reuse_weights=None,
weight_decay=0.0):
"""Constructor.
Args:
is_training: See base class.
first_stage_features_stride: See base class.
batch_norm_trainable: See base class.
reuse_weights: See base class.
weight_decay: See base class.
Raises:
ValueError: If `first_stage_features_stride` is not 16.
"""
if first_stage_features_stride != 16:
raise ValueError('`first_stage_features_stride` must be 16.')
super(FasterRCNNPNASFeatureExtractor, self).__init__(
is_training, first_stage_features_stride, batch_norm_trainable,
reuse_weights, weight_decay)
def preprocess(self, resized_inputs):
"""Faster R-CNN with PNAS preprocessing.
Maps pixel values to the range [-1, 1].
Args:
resized_inputs: A [batch, height_in, width_in, channels] float32 tensor
representing a batch of images with values between 0 and 255.0.
Returns:
preprocessed_inputs: A [batch, height_out, width_out, channels] float32
tensor representing a batch of images.
"""
return (2.0 / 255.0) * resized_inputs - 1.0
def _extract_proposal_features(self, preprocessed_inputs, scope):
"""Extracts first stage RPN features.
Extracts features using the first half of the PNASNet network.
We construct the network in `align_feature_maps=True` mode, which means
that all VALID paddings in the network are changed to SAME padding so that
the feature maps are aligned.
Args:
preprocessed_inputs: A [batch, height, width, channels] float32 tensor
representing a batch of images.
scope: A scope name.
Returns:
rpn_feature_map: A tensor with shape [batch, height, width, depth]
end_points: A dictionary mapping feature extractor tensor names to tensors
Raises:
ValueError: If the created network is missing the required activation.
"""
del scope
if len(preprocessed_inputs.get_shape().as_list()) != 4:
raise ValueError('`preprocessed_inputs` must be 4 dimensional, got a '
'tensor of shape %s' % preprocessed_inputs.get_shape())
with slim.arg_scope(pnasnet_large_arg_scope_for_detection(
is_batch_norm_training=self._train_batch_norm)):
with arg_scope([slim.conv2d,
slim.batch_norm,
slim.separable_conv2d],
reuse=self._reuse_weights):
_, end_points = pnasnet.build_pnasnet_large(
preprocessed_inputs, num_classes=None,
is_training=self._is_training,
final_endpoint='Cell_7')
# Note that both 'Cell_6' and 'Cell_7' have equal depth = 2160.
# Cell_7 is the last cell before second reduction.
rpn_feature_map = tf.concat([end_points['Cell_6'],
end_points['Cell_7']], 3)
# pnasnet.py does not maintain the batch size in the first dimension.
# This work around permits us retaining the batch for below.
batch = preprocessed_inputs.get_shape().as_list()[0]
shape_without_batch = rpn_feature_map.get_shape().as_list()[1:]
rpn_feature_map_shape = [batch] + shape_without_batch
rpn_feature_map.set_shape(rpn_feature_map_shape)
return rpn_feature_map, end_points
def _extract_box_classifier_features(self, proposal_feature_maps, scope):
"""Extracts second stage box classifier features.
This function reconstructs the "second half" of the PNASNet
network after the part defined in `_extract_proposal_features`.
Args:
proposal_feature_maps: A 4-D float tensor with shape
[batch_size * self.max_num_proposals, crop_height, crop_width, depth]
representing the feature map cropped to each proposal.
scope: A scope name.
Returns:
proposal_classifier_features: A 4-D float tensor with shape
[batch_size * self.max_num_proposals, height, width, depth]
representing box classifier features for each proposal.
"""
del scope
# Number of used stem cells.
num_stem_cells = 2
# Note that we always feed into 2 layers of equal depth
# where the first N channels corresponds to previous hidden layer
# and the second N channels correspond to the final hidden layer.
hidden_previous, hidden = tf.split(proposal_feature_maps, 2, axis=3)
# Note that what follows is largely a copy of build_pnasnet_large() within
# pnasnet.py. We are copying to minimize code pollution in slim.
# TODO(shlens,skornblith): Determine the appropriate drop path schedule.
# For now the schedule is the default (1.0->0.7 over 250,000 train steps).
hparams = pnasnet.large_imagenet_config()
if not self._is_training:
hparams.set_hparam('drop_path_keep_prob', 1.0)
# Calculate the total number of cells in the network
total_num_cells = hparams.num_cells + num_stem_cells
normal_cell = pnasnet.PNasNetNormalCell(
hparams.num_conv_filters, hparams.drop_path_keep_prob,
total_num_cells, hparams.total_training_steps)
with arg_scope([slim.dropout, nasnet_utils.drop_path],
is_training=self._is_training):
with arg_scope([slim.batch_norm], is_training=self._train_batch_norm):
with arg_scope([slim.avg_pool2d,
slim.max_pool2d,
slim.conv2d,
slim.batch_norm,
slim.separable_conv2d,
nasnet_utils.factorized_reduction,
nasnet_utils.global_avg_pool,
nasnet_utils.get_channel_index,
nasnet_utils.get_channel_dim],
data_format=hparams.data_format):
# This corresponds to the cell number just past 'Cell_7' used by
# _extract_proposal_features().
start_cell_num = 8
true_cell_num = start_cell_num + num_stem_cells
with slim.arg_scope(pnasnet.pnasnet_large_arg_scope()):
net = _build_pnasnet_base(
hidden_previous,
hidden,
normal_cell=normal_cell,
hparams=hparams,
true_cell_num=true_cell_num,
start_cell_num=start_cell_num)
proposal_classifier_features = net
return proposal_classifier_features
def restore_from_classification_checkpoint_fn(
self,
first_stage_feature_extractor_scope,
second_stage_feature_extractor_scope):
"""Returns a map of variables to load from a foreign checkpoint.
Note that this overrides the default implementation in
faster_rcnn_meta_arch.FasterRCNNFeatureExtractor which does not work for
PNASNet checkpoints.
Args:
first_stage_feature_extractor_scope: A scope name for the first stage
feature extractor.
second_stage_feature_extractor_scope: A scope name for the second stage
feature extractor.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
the model graph.
"""
variables_to_restore = {}
for variable in tf.global_variables():
if variable.op.name.startswith(
first_stage_feature_extractor_scope):
var_name = variable.op.name.replace(
first_stage_feature_extractor_scope + '/', '')
var_name += '/ExponentialMovingAverage'
variables_to_restore[var_name] = variable
if variable.op.name.startswith(
second_stage_feature_extractor_scope):
var_name = variable.op.name.replace(
second_stage_feature_extractor_scope + '/', '')
var_name += '/ExponentialMovingAverage'
variables_to_restore[var_name] = variable
return variables_to_restore
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for models.faster_rcnn_pnas_feature_extractor."""
import tensorflow as tf
from object_detection.models import faster_rcnn_pnas_feature_extractor as frcnn_pnas
class FasterRcnnPNASFeatureExtractorTest(tf.test.TestCase):
def _build_feature_extractor(self, first_stage_features_stride):
return frcnn_pnas.FasterRCNNPNASFeatureExtractor(
is_training=False,
first_stage_features_stride=first_stage_features_stride,
batch_norm_trainable=False,
reuse_weights=None,
weight_decay=0.0)
def test_extract_proposal_features_returns_expected_size(self):
feature_extractor = self._build_feature_extractor(
first_stage_features_stride=16)
preprocessed_inputs = tf.random_uniform(
[1, 299, 299, 3], maxval=255, dtype=tf.float32)
rpn_feature_map, _ = feature_extractor.extract_proposal_features(
preprocessed_inputs, scope='TestScope')
features_shape = tf.shape(rpn_feature_map)
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
features_shape_out = sess.run(features_shape)
self.assertAllEqual(features_shape_out, [1, 19, 19, 4320])
def test_extract_proposal_features_input_size_224(self):
feature_extractor = self._build_feature_extractor(
first_stage_features_stride=16)
preprocessed_inputs = tf.random_uniform(
[1, 224, 224, 3], maxval=255, dtype=tf.float32)
rpn_feature_map, _ = feature_extractor.extract_proposal_features(
preprocessed_inputs, scope='TestScope')
features_shape = tf.shape(rpn_feature_map)
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
features_shape_out = sess.run(features_shape)
self.assertAllEqual(features_shape_out, [1, 14, 14, 4320])
def test_extract_proposal_features_input_size_112(self):
feature_extractor = self._build_feature_extractor(
first_stage_features_stride=16)
preprocessed_inputs = tf.random_uniform(
[1, 112, 112, 3], maxval=255, dtype=tf.float32)
rpn_feature_map, _ = feature_extractor.extract_proposal_features(
preprocessed_inputs, scope='TestScope')
features_shape = tf.shape(rpn_feature_map)
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
features_shape_out = sess.run(features_shape)
self.assertAllEqual(features_shape_out, [1, 7, 7, 4320])
def test_extract_proposal_features_dies_on_invalid_stride(self):
with self.assertRaises(ValueError):
self._build_feature_extractor(first_stage_features_stride=99)
def test_extract_proposal_features_dies_with_incorrect_rank_inputs(self):
feature_extractor = self._build_feature_extractor(
first_stage_features_stride=16)
preprocessed_inputs = tf.random_uniform(
[224, 224, 3], maxval=255, dtype=tf.float32)
with self.assertRaises(ValueError):
feature_extractor.extract_proposal_features(
preprocessed_inputs, scope='TestScope')
def test_extract_box_classifier_features_returns_expected_size(self):
feature_extractor = self._build_feature_extractor(
first_stage_features_stride=16)
proposal_feature_maps = tf.random_uniform(
[2, 17, 17, 1088], maxval=255, dtype=tf.float32)
proposal_classifier_features = (
feature_extractor.extract_box_classifier_features(
proposal_feature_maps, scope='TestScope'))
features_shape = tf.shape(proposal_classifier_features)
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
features_shape_out = sess.run(features_shape)
self.assertAllEqual(features_shape_out, [2, 9, 9, 4320])
def test_filter_scaling_computation(self):
expected_filter_scaling = {
((4, 8), 2): 1.0,
((4, 8), 7): 2.0,
((4, 8), 8): 2.0,
((4, 8), 9): 4.0
}
for args, filter_scaling in expected_filter_scaling.items():
reduction_indices, start_cell_num = args
self.assertAlmostEqual(
frcnn_pnas._filter_scaling(reduction_indices, start_cell_num),
filter_scaling)
if __name__ == '__main__':
tf.test.main()
......@@ -37,7 +37,8 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
batch_norm_trainable=True,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
use_depthwise=False,
inplace_batchnorm_update=False):
"""InceptionV2 Feature Extractor for SSD Models.
Args:
......@@ -55,11 +56,16 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
"""
super(SSDInceptionV2FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights,
use_explicit_padding, use_depthwise)
use_explicit_padding, use_depthwise, inplace_batchnorm_update)
def preprocess(self, resized_inputs):
"""SSD preprocessing.
......@@ -76,7 +82,7 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""
return (2.0 / 255.0) * resized_inputs - 1.0
def extract_features(self, preprocessed_inputs):
def _extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
......
......@@ -37,7 +37,8 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
batch_norm_trainable=True,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
use_depthwise=False,
inplace_batchnorm_update=False):
"""InceptionV3 Feature Extractor for SSD Models.
Args:
......@@ -55,11 +56,16 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
"""
super(SSDInceptionV3FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights,
use_explicit_padding, use_depthwise)
use_explicit_padding, use_depthwise, inplace_batchnorm_update)
def preprocess(self, resized_inputs):
"""SSD preprocessing.
......@@ -76,7 +82,7 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""
return (2.0 / 255.0) * resized_inputs - 1.0
def extract_features(self, preprocessed_inputs):
def _extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
......
......@@ -38,7 +38,8 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
batch_norm_trainable=True,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
use_depthwise=False,
inplace_batchnorm_update=False):
"""MobileNetV1 Feature Extractor for SSD Models.
Args:
......@@ -57,11 +58,16 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
use_depthwise: Whether to use depthwise convolutions. Default is False.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
"""
super(SSDMobileNetV1FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights,
use_explicit_padding, use_depthwise)
use_explicit_padding, use_depthwise, inplace_batchnorm_update)
def preprocess(self, resized_inputs):
"""SSD preprocessing.
......@@ -78,7 +84,7 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""
return (2.0 / 255.0) * resized_inputs - 1.0
def extract_features(self, preprocessed_inputs):
def _extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
......
......@@ -39,7 +39,8 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
batch_norm_trainable=True,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
use_depthwise=False,
inplace_batchnorm_update=False):
"""MobileNetV2 Feature Extractor for SSD Models.
Mobilenet v2 (experimental), designed by sandler@. More details can be found
......@@ -60,11 +61,16 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
"""
super(SSDMobileNetV2FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights,
use_explicit_padding, use_depthwise)
use_explicit_padding, use_depthwise, inplace_batchnorm_update)
def preprocess(self, resized_inputs):
"""SSD preprocessing.
......@@ -81,7 +87,7 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""
return (2.0 / 255.0) * resized_inputs - 1.0
def extract_features(self, preprocessed_inputs):
def _extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
......
......@@ -43,7 +43,8 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
batch_norm_trainable=True,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
use_depthwise=False,
inplace_batchnorm_update=False):
"""SSD FPN feature extractor based on Resnet v1 architecture.
Args:
......@@ -66,6 +67,11 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
Raises:
ValueError: On supplying invalid arguments for unused arguments.
......@@ -73,7 +79,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
super(_SSDResnetV1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights,
use_explicit_padding)
use_explicit_padding, inplace_batchnorm_update)
if self._depth_multiplier != 1.0:
raise ValueError('Only depth 1.0 is supported, found: {}'.
format(self._depth_multiplier))
......@@ -110,7 +116,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
filtered_image_features[feature_name] = feature
return filtered_image_features
def extract_features(self, preprocessed_inputs):
def _extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
......@@ -176,7 +182,8 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
batch_norm_trainable=True,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
use_depthwise=False,
inplace_batchnorm_update=False):
"""Resnet50 v1 FPN Feature Extractor for SSD Models.
Args:
......@@ -194,11 +201,17 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
"""
super(SSDResnet50V1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, resnet_v1.resnet_v1_50, 'resnet_v1_50', 'fpn',
batch_norm_trainable, reuse_weights, use_explicit_padding)
batch_norm_trainable, reuse_weights, use_explicit_padding,
inplace_batchnorm_update)
class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
......@@ -212,7 +225,8 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
batch_norm_trainable=True,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
use_depthwise=False,
inplace_batchnorm_update=False):
"""Resnet101 v1 FPN Feature Extractor for SSD Models.
Args:
......@@ -230,11 +244,17 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
"""
super(SSDResnet101V1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, resnet_v1.resnet_v1_101, 'resnet_v1_101', 'fpn',
batch_norm_trainable, reuse_weights, use_explicit_padding)
batch_norm_trainable, reuse_weights, use_explicit_padding,
inplace_batchnorm_update)
class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
......@@ -248,7 +268,8 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
batch_norm_trainable=True,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
use_depthwise=False,
inplace_batchnorm_update=False):
"""Resnet152 v1 FPN Feature Extractor for SSD Models.
Args:
......@@ -266,8 +287,14 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
"""
super(SSDResnet152V1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, resnet_v1.resnet_v1_152, 'resnet_v1_152', 'fpn',
batch_norm_trainable, reuse_weights, use_explicit_padding)
batch_norm_trainable, reuse_weights, use_explicit_padding,
inplace_batchnorm_update)
......@@ -68,4 +68,8 @@ message EvalConfig {
// Whether to keep image identifier in filename when exported to
// visualization_export_dir.
optional bool keep_image_id_for_visualization_export = 19 [default=false];
// Whether to retain original images (i.e. not pre-processed) in the tensor
// dictionary, so that they can be displayed in Tensorboard.
optional bool retain_original_images = 23 [default=true];
}
......@@ -131,6 +131,12 @@ message FasterRcnn {
// to use sigmoid loss and enable merge_multiple_label_boxes.
// If not specified, Softmax loss is used as default.
optional ClassificationLoss second_stage_classification_loss = 29;
// Whether to update batch_norm inplace during training. This is required
// for batch norm to work correctly on TPUs. When this is false, user must add
// a control dependency on tf.GraphKeys.UPDATE_OPS for train/loss op in order
// to update the batch norm moving average parameters.
optional bool inplace_batchnorm_update = 30 [default = false];
}
......
......@@ -51,6 +51,9 @@ message InputReader {
// Number of reader instances to create.
optional uint32 num_readers = 6 [default=32];
// Number of records to read from each reader at once.
optional uint32 read_block_length = 15 [default=32];
// Number of decoded records to prefetch before batching.
optional uint32 prefetch_size = 13 [default = 512];
......
......@@ -59,6 +59,12 @@ message Ssd {
// Loss configuration for training.
optional Loss loss = 11;
// Whether to update batch_norm inplace during training. This is required
// for batch norm to work correctly on TPUs. When this is false, user must add
// a control dependency on tf.GraphKeys.UPDATE_OPS for train/loss op in order
// to update the batch norm moving average parameters.
optional bool inplace_batchnorm_update = 15 [default = false];
}
......
......@@ -94,4 +94,9 @@ message TrainConfig {
// Whether to remove padding along `num_boxes` dimension of the groundtruth
// tensors.
optional bool unpad_groundtruth_tensors = 21 [default=true];
// Whether to retain original images (i.e. not pre-processed) in the tensor
// dictionary, so that they can be displayed in Tensorboard. Note that this
// will lead to a larger memory footprint.
optional bool retain_original_images = 23 [default=false];
}
......@@ -264,29 +264,6 @@ def train(create_tensor_dict_fn, create_model_fn, train_config, master, task,
total_num_replicas=worker_replicas)
sync_optimizer = training_optimizer
# Create ops required to initialize the model from a given checkpoint.
init_fn = None
if train_config.fine_tune_checkpoint:
if not train_config.fine_tune_checkpoint_type:
# train_config.from_detection_checkpoint field is deprecated. For
# backward compatibility, fine_tune_checkpoint_type is set based on
# from_detection_checkpoint.
if train_config.from_detection_checkpoint:
train_config.fine_tune_checkpoint_type = 'detection'
else:
train_config.fine_tune_checkpoint_type = 'classification'
var_map = detection_model.restore_map(
fine_tune_checkpoint_type=train_config.fine_tune_checkpoint_type,
load_all_detection_checkpoint_vars=(
train_config.load_all_detection_checkpoint_vars))
available_var_map = (variables_helper.
get_variables_available_in_checkpoint(
var_map, train_config.fine_tune_checkpoint))
init_saver = tf.train.Saver(available_var_map)
def initializer_fn(sess):
init_saver.restore(sess, train_config.fine_tune_checkpoint)
init_fn = initializer_fn
with tf.device(deploy_config.optimizer_device()):
regularization_losses = (None if train_config.add_regularization_loss
else [])
......@@ -354,6 +331,29 @@ def train(create_tensor_dict_fn, create_model_fn, train_config, master, task,
saver = tf.train.Saver(
keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours)
# Create ops required to initialize the model from a given checkpoint.
init_fn = None
if train_config.fine_tune_checkpoint:
if not train_config.fine_tune_checkpoint_type:
# train_config.from_detection_checkpoint field is deprecated. For
# backward compatibility, fine_tune_checkpoint_type is set based on
# from_detection_checkpoint.
if train_config.from_detection_checkpoint:
train_config.fine_tune_checkpoint_type = 'detection'
else:
train_config.fine_tune_checkpoint_type = 'classification'
var_map = detection_model.restore_map(
fine_tune_checkpoint_type=train_config.fine_tune_checkpoint_type,
load_all_detection_checkpoint_vars=(
train_config.load_all_detection_checkpoint_vars))
available_var_map = (variables_helper.
get_variables_available_in_checkpoint(
var_map, train_config.fine_tune_checkpoint))
init_saver = tf.train.Saver(available_var_map)
def initializer_fn(sess):
init_saver.restore(sess, train_config.fine_tune_checkpoint)
init_fn = initializer_fn
slim.learning.train(
train_tensor,
logdir=train_dir,
......
......@@ -14,10 +14,13 @@
# ==============================================================================
"""Functions for reading and updating configuration files."""
import os
import tensorflow as tf
from google.protobuf import text_format
from tensorflow.python.lib.io import file_io
from object_detection.protos import eval_pb2
from object_detection.protos import input_reader_pb2
from object_detection.protos import model_pb2
......@@ -119,6 +122,24 @@ def create_pipeline_proto_from_configs(configs):
return pipeline_config
def save_pipeline_config(pipeline_config, directory):
"""Saves a pipeline config text file to disk.
Args:
pipeline_config: A pipeline_pb2.TrainEvalPipelineConfig.
directory: The model directory into which the pipeline config file will be
saved.
"""
if not file_io.file_exists(directory):
file_io.recursive_create_dir(directory)
pipeline_config_path = os.path.join(directory, "pipeline.config")
config_text = text_format.MessageToString(pipeline_config)
with tf.gfile.Open(pipeline_config_path, "wb") as f:
tf.logging.info("Writing pipeline config file to %s",
pipeline_config_path)
f.write(config_text)
def get_configs_from_multiple_files(model_config_path="",
train_config_path="",
train_input_config_path="",
......
......@@ -110,6 +110,23 @@ class ConfigUtilTest(tf.test.TestCase):
config_util.create_pipeline_proto_from_configs(configs))
self.assertEqual(pipeline_config, pipeline_config_reconstructed)
def test_save_pipeline_config(self):
"""Tests that the pipeline config is properly saved to disk."""
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.model.faster_rcnn.num_classes = 10
pipeline_config.train_config.batch_size = 32
pipeline_config.train_input_reader.label_map_path = "path/to/label_map"
pipeline_config.eval_config.num_examples = 20
pipeline_config.eval_input_reader.queue_capacity = 100
config_util.save_pipeline_config(pipeline_config, self.get_temp_dir())
configs = config_util.get_configs_from_pipeline_file(
os.path.join(self.get_temp_dir(), "pipeline.config"))
pipeline_config_reconstructed = (
config_util.create_pipeline_proto_from_configs(configs))
self.assertEqual(pipeline_config, pipeline_config_reconstructed)
def test_get_configs_from_multiple_files(self):
"""Tests that proto configs can be read from multiple files."""
temp_dir = self.get_temp_dir()
......
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Python context management helper."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
class IdentityContextManager(object):
"""Returns an identity context manager that does nothing.
This is helpful in setting up conditional `with` statement as below:
with slim.arg_scope(x) if use_slim_scope else IdentityContextManager():
do_stuff()
"""
def __enter__(self):
return None
def __exit__(self, exec_type, exec_value, traceback):
del exec_type
del exec_value
del traceback
return False
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for tensorflow_models.object_detection.utils.context_manager."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from object_detection.utils import context_manager
class ContextManagerTest(tf.test.TestCase):
def test_identity_context_manager(self):
with context_manager.IdentityContextManager() as identity_context:
self.assertIsNone(identity_context)
if __name__ == '__main__':
tf.test.main()
......@@ -123,11 +123,16 @@ def read_dataset(file_read_func, decode_func, input_files, config):
if config.shuffle:
filename_dataset = filename_dataset.shuffle(
config.filenames_shuffle_buffer_size)
elif config.num_readers > 1:
tf.logging.warning('`shuffle` is false, but the input data stream is '
'still slightly shuffled since `num_readers` > 1.')
filename_dataset = filename_dataset.repeat(config.num_epochs or None)
records_dataset = filename_dataset.apply(
tf.contrib.data.parallel_interleave(
file_read_func, cycle_length=config.num_readers, sloppy=True))
file_read_func, cycle_length=config.num_readers,
block_length=config.read_block_length, sloppy=True))
if config.shuffle:
records_dataset.shuffle(config.shuffle_buffer_size)
tensor_dataset = records_dataset.map(
......
......@@ -180,26 +180,24 @@ def pad_to_multiple(tensor, multiple):
padded_tensor_width = int(
math.ceil(float(tensor_width) / multiple) * multiple)
if (padded_tensor_height == tensor_height and
padded_tensor_width == tensor_width):
return tensor
if tensor_depth is None:
tensor_depth = tf.shape(tensor)[3]
# Use tf.concat instead of tf.pad to preserve static shape
height_pad = tf.zeros([
batch_size, padded_tensor_height - tensor_height, tensor_width,
tensor_depth
])
padded_tensor = tf.concat([tensor, height_pad], 1)
width_pad = tf.zeros([
batch_size, padded_tensor_height, padded_tensor_width - tensor_width,
tensor_depth
])
padded_tensor = tf.concat([padded_tensor, width_pad], 2)
return padded_tensor
if padded_tensor_height != tensor_height:
height_pad = tf.zeros([
batch_size, padded_tensor_height - tensor_height, tensor_width,
tensor_depth
])
tensor = tf.concat([tensor, height_pad], 1)
if padded_tensor_width != tensor_width:
width_pad = tf.zeros([
batch_size, padded_tensor_height, padded_tensor_width - tensor_width,
tensor_depth
])
tensor = tf.concat([tensor, width_pad], 2)
return tensor
def padded_one_hot_encoding(indices, depth, left_pad):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment