Unverified Commit ed4e22b8 authored by pkulzc's avatar pkulzc Committed by GitHub
Browse files

Merge pull request #3973 from pkulzc/master

Object detection internal changes
parents cac90a0e 13b89b93
...@@ -42,12 +42,11 @@ class SSDFeatureExtractor(object): ...@@ -42,12 +42,11 @@ class SSDFeatureExtractor(object):
depth_multiplier, depth_multiplier,
min_depth, min_depth,
pad_to_multiple, pad_to_multiple,
conv_hyperparams, conv_hyperparams_fn,
batch_norm_trainable=True,
reuse_weights=None, reuse_weights=None,
use_explicit_padding=False, use_explicit_padding=False,
use_depthwise=False, use_depthwise=False,
inplace_batchnorm_update=False): override_base_feature_extractor_hyperparams=False):
"""Constructor. """Constructor.
Args: Args:
...@@ -56,30 +55,27 @@ class SSDFeatureExtractor(object): ...@@ -56,30 +55,27 @@ class SSDFeatureExtractor(object):
min_depth: minimum feature extractor depth. min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
batch_norm_trainable: Whether to update batch norm parameters during and separable_conv2d ops in the layers that are added on top of the
training or not. When training with a small batch size base feature extractor.
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: whether to reuse variables. Default is None. reuse_weights: whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False. use_depthwise: Whether to use depthwise convolutions. Default is False.
inplace_batchnorm_update: Whether to update batch norm moving average override_base_feature_extractor_hyperparams: Whether to override
values inplace. When this is false train op must add a control hyperparameters of the base feature extractor with the one from
dependency on tf.graphkeys.UPDATE_OPS collection in order to update `conv_hyperparams_fn`.
batch norm statistics.
""" """
self._is_training = is_training self._is_training = is_training
self._depth_multiplier = depth_multiplier self._depth_multiplier = depth_multiplier
self._min_depth = min_depth self._min_depth = min_depth
self._pad_to_multiple = pad_to_multiple self._pad_to_multiple = pad_to_multiple
self._conv_hyperparams = conv_hyperparams self._conv_hyperparams_fn = conv_hyperparams_fn
self._batch_norm_trainable = batch_norm_trainable
self._inplace_batchnorm_update = inplace_batchnorm_update
self._reuse_weights = reuse_weights self._reuse_weights = reuse_weights
self._use_explicit_padding = use_explicit_padding self._use_explicit_padding = use_explicit_padding
self._use_depthwise = use_depthwise self._use_depthwise = use_depthwise
self._override_base_feature_extractor_hyperparams = (
override_base_feature_extractor_hyperparams)
@abstractmethod @abstractmethod
def preprocess(self, resized_inputs): def preprocess(self, resized_inputs):
...@@ -106,28 +102,6 @@ class SSDFeatureExtractor(object): ...@@ -106,28 +102,6 @@ class SSDFeatureExtractor(object):
This function is responsible for extracting feature maps from preprocessed This function is responsible for extracting feature maps from preprocessed
images. images.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
batchnorm_updates_collections = (None if self._inplace_batchnorm_update
else tf.GraphKeys.UPDATE_OPS)
with slim.arg_scope([slim.batch_norm],
updates_collections=batchnorm_updates_collections):
return self._extract_features(preprocessed_inputs)
@abstractmethod
def _extract_features(self, preprocessed_inputs):
"""Extracts features from preprocessed inputs.
This function is responsible for extracting feature maps from preprocessed
images.
Args: Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images. representing a batch of images.
...@@ -162,7 +136,10 @@ class SSDMetaArch(model.DetectionModel): ...@@ -162,7 +136,10 @@ class SSDMetaArch(model.DetectionModel):
normalize_loss_by_num_matches, normalize_loss_by_num_matches,
hard_example_miner, hard_example_miner,
add_summaries=True, add_summaries=True,
normalize_loc_loss_by_codesize=False): normalize_loc_loss_by_codesize=False,
freeze_batchnorm=False,
inplace_batchnorm_update=False,
add_background_class=True):
"""SSDMetaArch Constructor. """SSDMetaArch Constructor.
TODO(rathodv,jonathanhuang): group NMS parameters + score converter into TODO(rathodv,jonathanhuang): group NMS parameters + score converter into
...@@ -209,9 +186,23 @@ class SSDMetaArch(model.DetectionModel): ...@@ -209,9 +186,23 @@ class SSDMetaArch(model.DetectionModel):
should be added to tensorflow graph. should be added to tensorflow graph.
normalize_loc_loss_by_codesize: whether to normalize localization loss normalize_loc_loss_by_codesize: whether to normalize localization loss
by code size of the box encoder. by code size of the box encoder.
freeze_batchnorm: Whether to freeze batch norm parameters during
training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm
params.
inplace_batchnorm_update: Whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
add_background_class: Whether to add an implicit background class to
one-hot encodings of groundtruth labels. Set to false if using
groundtruth labels with an explicit background class or using multiclass
scores instead of truth in the case of distillation.
""" """
super(SSDMetaArch, self).__init__(num_classes=box_predictor.num_classes) super(SSDMetaArch, self).__init__(num_classes=box_predictor.num_classes)
self._is_training = is_training self._is_training = is_training
self._freeze_batchnorm = freeze_batchnorm
self._inplace_batchnorm_update = inplace_batchnorm_update
# Needed for fine-tuning from classification checkpoints whose # Needed for fine-tuning from classification checkpoints whose
# variables do not have the feature extractor scope. # variables do not have the feature extractor scope.
...@@ -224,6 +215,7 @@ class SSDMetaArch(model.DetectionModel): ...@@ -224,6 +215,7 @@ class SSDMetaArch(model.DetectionModel):
self._feature_extractor = feature_extractor self._feature_extractor = feature_extractor
self._matcher = matcher self._matcher = matcher
self._region_similarity_calculator = region_similarity_calculator self._region_similarity_calculator = region_similarity_calculator
self._add_background_class = add_background_class
# TODO(jonathanhuang): handle agnostic mode # TODO(jonathanhuang): handle agnostic mode
# weights # weights
...@@ -255,6 +247,7 @@ class SSDMetaArch(model.DetectionModel): ...@@ -255,6 +247,7 @@ class SSDMetaArch(model.DetectionModel):
self._anchors = None self._anchors = None
self._add_summaries = add_summaries self._add_summaries = add_summaries
self._batched_prediction_tensor_names = []
@property @property
def anchors(self): def anchors(self):
...@@ -264,6 +257,13 @@ class SSDMetaArch(model.DetectionModel): ...@@ -264,6 +257,13 @@ class SSDMetaArch(model.DetectionModel):
raise RuntimeError('anchors should be a BoxList object, but is not.') raise RuntimeError('anchors should be a BoxList object, but is not.')
return self._anchors return self._anchors
@property
def batched_prediction_tensor_names(self):
if not self._batched_prediction_tensor_names:
raise RuntimeError('Must call predict() method to get batched prediction '
'tensor names.')
return self._batched_prediction_tensor_names
def preprocess(self, inputs): def preprocess(self, inputs):
"""Feature-extractor specific preprocessing. """Feature-extractor specific preprocessing.
...@@ -372,32 +372,42 @@ class SSDMetaArch(model.DetectionModel): ...@@ -372,32 +372,42 @@ class SSDMetaArch(model.DetectionModel):
5) anchors: 2-D float tensor of shape [num_anchors, 4] containing 5) anchors: 2-D float tensor of shape [num_anchors, 4] containing
the generated anchors in normalized coordinates. the generated anchors in normalized coordinates.
""" """
with tf.variable_scope(None, self._extract_features_scope, batchnorm_updates_collections = (None if self._inplace_batchnorm_update
[preprocessed_inputs]): else tf.GraphKeys.UPDATE_OPS)
feature_maps = self._feature_extractor.extract_features( with slim.arg_scope([slim.batch_norm],
is_training=(self._is_training and
not self._freeze_batchnorm),
updates_collections=batchnorm_updates_collections):
with tf.variable_scope(None, self._extract_features_scope,
[preprocessed_inputs]):
feature_maps = self._feature_extractor.extract_features(
preprocessed_inputs)
feature_map_spatial_dims = self._get_feature_map_spatial_dims(
feature_maps)
image_shape = shape_utils.combined_static_and_dynamic_shape(
preprocessed_inputs) preprocessed_inputs)
feature_map_spatial_dims = self._get_feature_map_spatial_dims(feature_maps) self._anchors = box_list_ops.concatenate(
image_shape = shape_utils.combined_static_and_dynamic_shape( self._anchor_generator.generate(
preprocessed_inputs) feature_map_spatial_dims,
self._anchors = box_list_ops.concatenate( im_height=image_shape[1],
self._anchor_generator.generate( im_width=image_shape[2]))
feature_map_spatial_dims, prediction_dict = self._box_predictor.predict(
im_height=image_shape[1], feature_maps, self._anchor_generator.num_anchors_per_location())
im_width=image_shape[2])) box_encodings = tf.squeeze(
prediction_dict = self._box_predictor.predict( tf.concat(prediction_dict['box_encodings'], axis=1), axis=2)
feature_maps, self._anchor_generator.num_anchors_per_location()) class_predictions_with_background = tf.concat(
box_encodings = tf.squeeze( prediction_dict['class_predictions_with_background'], axis=1)
tf.concat(prediction_dict['box_encodings'], axis=1), axis=2) predictions_dict = {
class_predictions_with_background = tf.concat( 'preprocessed_inputs': preprocessed_inputs,
prediction_dict['class_predictions_with_background'], axis=1) 'box_encodings': box_encodings,
predictions_dict = { 'class_predictions_with_background':
'preprocessed_inputs': preprocessed_inputs, class_predictions_with_background,
'box_encodings': box_encodings, 'feature_maps': feature_maps,
'class_predictions_with_background': class_predictions_with_background, 'anchors': self._anchors.get()
'feature_maps': feature_maps, }
'anchors': self._anchors.get() self._batched_prediction_tensor_names = [x for x in predictions_dict
} if x != 'anchors']
return predictions_dict return predictions_dict
def _get_feature_map_spatial_dims(self, feature_maps): def _get_feature_map_spatial_dims(self, feature_maps):
"""Return list of spatial dimensions for each feature map in a list. """Return list of spatial dimensions for each feature map in a list.
...@@ -578,8 +588,8 @@ class SSDMetaArch(model.DetectionModel): ...@@ -578,8 +588,8 @@ class SSDMetaArch(model.DetectionModel):
name='classification_loss') name='classification_loss')
loss_dict = { loss_dict = {
localization_loss.op.name: localization_loss, str(localization_loss.op.name): localization_loss,
classification_loss.op.name: classification_loss str(classification_loss.op.name): classification_loss
} }
return loss_dict return loss_dict
...@@ -632,10 +642,14 @@ class SSDMetaArch(model.DetectionModel): ...@@ -632,10 +642,14 @@ class SSDMetaArch(model.DetectionModel):
groundtruth_boxlists = [ groundtruth_boxlists = [
box_list.BoxList(boxes) for boxes in groundtruth_boxes_list box_list.BoxList(boxes) for boxes in groundtruth_boxes_list
] ]
groundtruth_classes_with_background_list = [ if self._add_background_class:
tf.pad(one_hot_encoding, [[0, 0], [1, 0]], mode='CONSTANT') groundtruth_classes_with_background_list = [
for one_hot_encoding in groundtruth_classes_list tf.pad(one_hot_encoding, [[0, 0], [1, 0]], mode='CONSTANT')
] for one_hot_encoding in groundtruth_classes_list
]
else:
groundtruth_classes_with_background_list = groundtruth_classes_list
if groundtruth_keypoints_list is not None: if groundtruth_keypoints_list is not None:
for boxlist, keypoints in zip( for boxlist, keypoints in zip(
groundtruth_boxlists, groundtruth_keypoints_list): groundtruth_boxlists, groundtruth_keypoints_list):
......
...@@ -38,8 +38,7 @@ class FakeSSDFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -38,8 +38,7 @@ class FakeSSDFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
depth_multiplier=0, depth_multiplier=0,
min_depth=0, min_depth=0,
pad_to_multiple=1, pad_to_multiple=1,
batch_norm_trainable=True, conv_hyperparams_fn=None)
conv_hyperparams=None)
def preprocess(self, resized_inputs): def preprocess(self, resized_inputs):
return tf.identity(resized_inputs) return tf.identity(resized_inputs)
...@@ -81,8 +80,10 @@ def _get_value_for_matching_key(dictionary, suffix): ...@@ -81,8 +80,10 @@ def _get_value_for_matching_key(dictionary, suffix):
class SsdMetaArchTest(test_case.TestCase): class SsdMetaArchTest(test_case.TestCase):
def _create_model(self, apply_hard_mining=True, def _create_model(self,
normalize_loc_loss_by_codesize=False): apply_hard_mining=True,
normalize_loc_loss_by_codesize=False,
add_background_class=True):
is_training = False is_training = False
num_classes = 1 num_classes = 1
mock_anchor_generator = MockAnchorGenerator2x2() mock_anchor_generator = MockAnchorGenerator2x2()
...@@ -118,13 +119,29 @@ class SsdMetaArchTest(test_case.TestCase): ...@@ -118,13 +119,29 @@ class SsdMetaArchTest(test_case.TestCase):
code_size = 4 code_size = 4
model = ssd_meta_arch.SSDMetaArch( model = ssd_meta_arch.SSDMetaArch(
is_training, mock_anchor_generator, mock_box_predictor, mock_box_coder, is_training,
fake_feature_extractor, mock_matcher, region_similarity_calculator, mock_anchor_generator,
encode_background_as_zeros, negative_class_weight, image_resizer_fn, mock_box_predictor,
non_max_suppression_fn, tf.identity, classification_loss, mock_box_coder,
localization_loss, classification_loss_weight, localization_loss_weight, fake_feature_extractor,
normalize_loss_by_num_matches, hard_example_miner, add_summaries=False, mock_matcher,
normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize) region_similarity_calculator,
encode_background_as_zeros,
negative_class_weight,
image_resizer_fn,
non_max_suppression_fn,
tf.identity,
classification_loss,
localization_loss,
classification_loss_weight,
localization_loss_weight,
normalize_loss_by_num_matches,
hard_example_miner,
add_summaries=False,
normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
freeze_batchnorm=False,
inplace_batchnorm_update=False,
add_background_class=add_background_class)
return model, num_classes, mock_anchor_generator.num_anchors(), code_size return model, num_classes, mock_anchor_generator.num_anchors(), code_size
def test_preprocess_preserves_shapes_with_dynamic_input_image(self): def test_preprocess_preserves_shapes_with_dynamic_input_image(self):
...@@ -365,6 +382,43 @@ class SsdMetaArchTest(test_case.TestCase): ...@@ -365,6 +382,43 @@ class SsdMetaArchTest(test_case.TestCase):
self.assertAllClose(localization_loss, expected_localization_loss) self.assertAllClose(localization_loss, expected_localization_loss)
self.assertAllClose(classification_loss, expected_classification_loss) self.assertAllClose(classification_loss, expected_classification_loss)
def test_loss_results_are_correct_without_add_background_class(self):
with tf.Graph().as_default():
_, num_classes, num_anchors, _ = self._create_model(
add_background_class=False)
def graph_fn(preprocessed_tensor, groundtruth_boxes1, groundtruth_boxes2,
groundtruth_classes1, groundtruth_classes2):
groundtruth_boxes_list = [groundtruth_boxes1, groundtruth_boxes2]
groundtruth_classes_list = [groundtruth_classes1, groundtruth_classes2]
model, _, _, _ = self._create_model(
apply_hard_mining=False, add_background_class=False)
model.provide_groundtruth(groundtruth_boxes_list,
groundtruth_classes_list)
prediction_dict = model.predict(
preprocessed_tensor, true_image_shapes=None)
loss_dict = model.loss(prediction_dict, true_image_shapes=None)
return (loss_dict['Loss/localization_loss'],
loss_dict['Loss/classification_loss'])
batch_size = 2
preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32)
groundtruth_boxes1 = np.array([[0, 0, .5, .5]], dtype=np.float32)
groundtruth_boxes2 = np.array([[0, 0, .5, .5]], dtype=np.float32)
groundtruth_classes1 = np.array([[0, 1]], dtype=np.float32)
groundtruth_classes2 = np.array([[0, 1]], dtype=np.float32)
expected_localization_loss = 0.0
expected_classification_loss = (
batch_size * num_anchors * (num_classes + 1) * np.log(2.0))
(localization_loss, classification_loss) = self.execute(
graph_fn, [
preprocessed_input, groundtruth_boxes1, groundtruth_boxes2,
groundtruth_classes1, groundtruth_classes2
])
self.assertAllClose(localization_loss, expected_localization_loss)
self.assertAllClose(classification_loss, expected_classification_loss)
def test_restore_map_for_detection_ckpt(self): def test_restore_map_for_detection_ckpt(self):
model, _, _, _ = self._create_model() model, _, _, _ = self._create_model()
model.predict(tf.constant(np.array([[[0, 0], [1, 1]], [[1, 0], [0, 1]]], model.predict(tf.constant(np.array([[[0, 0], [1, 1]], [[1, 0], [0, 1]]],
......
...@@ -12,13 +12,7 @@ ...@@ -12,13 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
r"""Creates and runs `Experiment` for object detection model. r"""Constructs model, inputs, and training environment."""
This uses the TF.learn framework to define and run an object detection model
wrapped in an `Estimator`.
Note that this module is only compatible with SSD Meta architecture at the
moment.
"""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
...@@ -29,13 +23,8 @@ import os ...@@ -29,13 +23,8 @@ import os
import tensorflow as tf import tensorflow as tf
from google.protobuf import text_format
from tensorflow.contrib.learn.python.learn import learn_runner
from tensorflow.contrib.tpu.python.tpu import tpu_optimizer
from tensorflow.python.lib.io import file_io
from object_detection import eval_util from object_detection import eval_util
from object_detection import inputs from object_detection import inputs
from object_detection import model_hparams
from object_detection.builders import model_builder from object_detection.builders import model_builder
from object_detection.builders import optimizer_builder from object_detection.builders import optimizer_builder
from object_detection.core import standard_fields as fields from object_detection.core import standard_fields as fields
...@@ -45,15 +34,6 @@ from object_detection.utils import shape_utils ...@@ -45,15 +34,6 @@ from object_detection.utils import shape_utils
from object_detection.utils import variables_helper from object_detection.utils import variables_helper
from object_detection.utils import visualization_utils as vis_utils from object_detection.utils import visualization_utils as vis_utils
tf.flags.DEFINE_string('model_dir', None, 'Path to output model directory '
'where event and checkpoint files will be written.')
tf.flags.DEFINE_string('pipeline_config_path', None, 'Path to pipeline config '
'file.')
tf.flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.')
tf.flags.DEFINE_integer('num_eval_steps', None, 'Number of train steps.')
FLAGS = tf.flags.FLAGS
# A map of names to methods that help build the model. # A map of names to methods that help build the model.
MODEL_BUILD_UTIL_MAP = { MODEL_BUILD_UTIL_MAP = {
'get_configs_from_pipeline_file': 'get_configs_from_pipeline_file':
...@@ -297,17 +277,20 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False): ...@@ -297,17 +277,20 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
regularization_loss = tf.add_n(regularization_losses, regularization_loss = tf.add_n(regularization_losses,
name='regularization_loss') name='regularization_loss')
losses.append(regularization_loss) losses.append(regularization_loss)
if not use_tpu: losses_dict['Loss/regularization_loss'] = regularization_loss
tf.summary.scalar('regularization_loss', regularization_loss)
total_loss = tf.add_n(losses, name='total_loss') total_loss = tf.add_n(losses, name='total_loss')
losses_dict['Loss/total_loss'] = total_loss
if mode == tf.estimator.ModeKeys.TRAIN: if mode in [tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL]:
# TODO(rathodv): Stop creating optimizer summary vars in EVAL mode once we
# can write learning rate summaries on TPU without host calls.
global_step = tf.train.get_or_create_global_step() global_step = tf.train.get_or_create_global_step()
training_optimizer, optimizer_summary_vars = optimizer_builder.build( training_optimizer, optimizer_summary_vars = optimizer_builder.build(
train_config.optimizer) train_config.optimizer)
if mode == tf.estimator.ModeKeys.TRAIN:
if use_tpu: if use_tpu:
training_optimizer = tpu_optimizer.CrossShardOptimizer( training_optimizer = tf.contrib.tpu.CrossShardOptimizer(
training_optimizer) training_optimizer)
# Optionally freeze some layers by setting their gradients to be zero. # Optionally freeze some layers by setting their gradients to be zero.
...@@ -380,9 +363,14 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False): ...@@ -380,9 +363,14 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators( eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
eval_metrics, category_index.values(), eval_dict, eval_metrics, category_index.values(), eval_dict,
include_metrics_per_category=False) include_metrics_per_category=False)
for loss_key, loss_tensor in iter(losses_dict.items()):
eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor)
for var in optimizer_summary_vars:
eval_metric_ops[var.op.name] = (var, tf.no_op())
if img_summary is not None: if img_summary is not None:
eval_metric_ops['Detections_Left_Groundtruth_Right'] = ( eval_metric_ops['Detections_Left_Groundtruth_Right'] = (
img_summary, tf.no_op()) img_summary, tf.no_op())
eval_metric_ops = {str(k): v for k, v in eval_metric_ops.iteritems()}
if use_tpu: if use_tpu:
return tf.contrib.tpu.TPUEstimatorSpec( return tf.contrib.tpu.TPUEstimatorSpec(
...@@ -405,33 +393,18 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False): ...@@ -405,33 +393,18 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
return model_fn return model_fn
def build_experiment_fn(train_steps, eval_steps): def create_estimator_and_inputs(run_config,
"""Returns a function that creates an `Experiment`.""" hparams,
pipeline_config_path,
def build_experiment(run_config, hparams): train_steps=None,
"""Builds an `Experiment` from configuration and hyperparameters. eval_steps=None,
model_fn_creator=create_model_fn,
Args: use_tpu_estimator=False,
run_config: A `RunConfig`. use_tpu=False,
hparams: A `HParams`. num_shards=1,
params=None,
Returns: **kwargs):
An `Experiment` object. """Creates `Estimator`, input functions, and steps.
"""
return populate_experiment(run_config, hparams, FLAGS.pipeline_config_path,
train_steps, eval_steps)
return build_experiment
def populate_experiment(run_config,
hparams,
pipeline_config_path,
train_steps=None,
eval_steps=None,
model_fn_creator=create_model_fn,
**kwargs):
"""Populates an `Experiment` object.
Args: Args:
run_config: A `RunConfig`. run_config: A `RunConfig`.
...@@ -451,18 +424,34 @@ def populate_experiment(run_config, ...@@ -451,18 +424,34 @@ def populate_experiment(run_config,
* Returns: * Returns:
`model_fn` for `Estimator`. `model_fn` for `Estimator`.
use_tpu_estimator: Whether a `TPUEstimator` should be returned. If False,
an `Estimator` will be returned.
use_tpu: Boolean, whether training and evaluation should run on TPU. Only
used if `use_tpu_estimator` is True.
num_shards: Number of shards (TPU cores). Only used if `use_tpu_estimator`
is True.
params: Parameter dictionary passed from the estimator. Only used if
`use_tpu_estimator` is True.
**kwargs: Additional keyword arguments for configuration override. **kwargs: Additional keyword arguments for configuration override.
Returns: Returns:
An `Experiment` that defines all aspects of training, evaluation, and A dictionary with the following fields:
export. 'estimator': An `Estimator` or `TPUEstimator`.
'train_input_fn': A training input function.
'eval_input_fn': An evaluation input function.
'eval_on_train_input_fn': An evaluation-on-train input function.
'predict_input_fn': A prediction input function.
'train_steps': Number of training steps. Either directly from input or from
configuration.
'eval_steps': Number of evaluation steps. Either directly from input or from
configuration.
""" """
get_configs_from_pipeline_file = MODEL_BUILD_UTIL_MAP[ get_configs_from_pipeline_file = MODEL_BUILD_UTIL_MAP[
'get_configs_from_pipeline_file'] 'get_configs_from_pipeline_file']
create_pipeline_proto_from_configs = MODEL_BUILD_UTIL_MAP[
'create_pipeline_proto_from_configs']
merge_external_params_with_configs = MODEL_BUILD_UTIL_MAP[ merge_external_params_with_configs = MODEL_BUILD_UTIL_MAP[
'merge_external_params_with_configs'] 'merge_external_params_with_configs']
create_pipeline_proto_from_configs = MODEL_BUILD_UTIL_MAP[
'create_pipeline_proto_from_configs']
create_train_input_fn = MODEL_BUILD_UTIL_MAP['create_train_input_fn'] create_train_input_fn = MODEL_BUILD_UTIL_MAP['create_train_input_fn']
create_eval_input_fn = MODEL_BUILD_UTIL_MAP['create_eval_input_fn'] create_eval_input_fn = MODEL_BUILD_UTIL_MAP['create_eval_input_fn']
create_predict_input_fn = MODEL_BUILD_UTIL_MAP['create_predict_input_fn'] create_predict_input_fn = MODEL_BUILD_UTIL_MAP['create_predict_input_fn']
...@@ -480,16 +469,16 @@ def populate_experiment(run_config, ...@@ -480,16 +469,16 @@ def populate_experiment(run_config,
eval_config = configs['eval_config'] eval_config = configs['eval_config']
eval_input_config = configs['eval_input_config'] eval_input_config = configs['eval_input_config']
if train_steps is None and train_config.num_steps: if train_steps is None:
train_steps = train_config.num_steps train_steps = configs['train_config'].num_steps
if eval_steps is None and eval_config.num_examples: if eval_steps is None:
eval_steps = eval_config.num_examples eval_steps = configs['eval_config'].num_examples
detection_model_fn = functools.partial( detection_model_fn = functools.partial(
model_builder.build, model_config=model_config) model_builder.build, model_config=model_config)
# Create the input functions for TRAIN/EVAL. # Create the input functions for TRAIN/EVAL/PREDICT.
train_input_fn = create_train_input_fn( train_input_fn = create_train_input_fn(
train_config=train_config, train_config=train_config,
train_input_config=train_input_config, train_input_config=train_input_config,
...@@ -498,51 +487,200 @@ def populate_experiment(run_config, ...@@ -498,51 +487,200 @@ def populate_experiment(run_config,
eval_config=eval_config, eval_config=eval_config,
eval_input_config=eval_input_config, eval_input_config=eval_input_config,
model_config=model_config) model_config=model_config)
eval_on_train_input_fn = create_eval_input_fn(
eval_config=eval_config,
eval_input_config=train_input_config,
model_config=model_config)
predict_input_fn = create_predict_input_fn(model_config=model_config)
model_fn = model_fn_creator(detection_model_fn, configs, hparams, use_tpu)
if use_tpu_estimator:
estimator = tf.contrib.tpu.TPUEstimator(
model_fn=model_fn,
train_batch_size=train_config.batch_size,
# For each core, only batch size 1 is supported for eval.
eval_batch_size=num_shards * 1 if use_tpu else 1,
use_tpu=use_tpu,
config=run_config,
params=params if params else {})
else:
estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config)
export_strategies = [ # Write the as-run pipeline config to disk.
tf.contrib.learn.utils.saved_model_export_utils.make_export_strategy(
serving_input_fn=create_predict_input_fn(
model_config=model_config))
]
estimator = tf.estimator.Estimator(
model_fn=model_fn_creator(detection_model_fn, configs, hparams),
config=run_config)
if run_config.is_chief: if run_config.is_chief:
# Store the final pipeline config for traceability.
pipeline_config_final = create_pipeline_proto_from_configs( pipeline_config_final = create_pipeline_proto_from_configs(
configs) configs)
if not file_io.file_exists(estimator.model_dir): config_util.save_pipeline_config(pipeline_config_final, estimator.model_dir)
file_io.recursive_create_dir(estimator.model_dir)
pipeline_config_final_path = os.path.join(estimator.model_dir,
'pipeline.config')
config_text = text_format.MessageToString(pipeline_config_final)
with tf.gfile.Open(pipeline_config_final_path, 'wb') as f:
tf.logging.info('Writing as-run pipeline config file to %s',
pipeline_config_final_path)
f.write(config_text)
return tf.contrib.learn.Experiment( return dict(
estimator=estimator, estimator=estimator,
train_input_fn=train_input_fn, train_input_fn=train_input_fn,
eval_input_fn=eval_input_fn, eval_input_fn=eval_input_fn,
eval_on_train_input_fn=eval_on_train_input_fn,
predict_input_fn=predict_input_fn,
train_steps=train_steps, train_steps=train_steps,
eval_steps=eval_steps, eval_steps=eval_steps)
export_strategies=export_strategies,
eval_delay_secs=120,)
def create_train_and_eval_specs(train_input_fn,
eval_input_fn,
eval_on_train_input_fn,
predict_input_fn,
train_steps,
eval_steps,
eval_on_train_data=False,
final_exporter_name='Servo',
eval_spec_name='eval'):
"""Creates a `TrainSpec` and `EvalSpec`s.
Args:
train_input_fn: Function that produces features and labels on train data.
eval_input_fn: Function that produces features and labels on eval data.
eval_on_train_input_fn: Function that produces features and labels for
evaluation on train data.
predict_input_fn: Function that produces features for inference.
train_steps: Number of training steps.
eval_steps: Number of eval steps.
eval_on_train_data: Whether to evaluate model on training data. Default is
False.
final_exporter_name: String name given to `FinalExporter`.
eval_spec_name: String name given to main `EvalSpec`.
Returns:
Tuple of `TrainSpec` and list of `EvalSpecs`. The first `EvalSpec` is for
evaluation data. If `eval_on_train_data` is True, the second `EvalSpec` in
the list will correspond to training data.
"""
exporter = tf.estimator.FinalExporter(
name=final_exporter_name, serving_input_receiver_fn=predict_input_fn)
train_spec = tf.estimator.TrainSpec(
input_fn=train_input_fn, max_steps=train_steps)
eval_specs = [
tf.estimator.EvalSpec(
name=eval_spec_name,
input_fn=eval_input_fn,
steps=eval_steps,
exporters=exporter)
]
if eval_on_train_data:
eval_specs.append(
tf.estimator.EvalSpec(
name='eval_on_train', input_fn=eval_on_train_input_fn,
steps=eval_steps))
return train_spec, eval_specs
def main(unused_argv): def continuous_eval(estimator, model_dir, input_fn, eval_steps, train_steps,
tf.flags.mark_flag_as_required('model_dir') name):
tf.flags.mark_flag_as_required('pipeline_config_path') """Perform continuous evaluation on checkpoints written to a model directory.
config = tf.contrib.learn.RunConfig(model_dir=FLAGS.model_dir)
learn_runner.run(
experiment_fn=build_experiment_fn(FLAGS.num_train_steps,
FLAGS.num_eval_steps),
run_config=config,
hparams=model_hparams.create_hparams())
Args:
estimator: Estimator object to use for evaluation.
model_dir: Model directory to read checkpoints for continuous evaluation.
input_fn: Input function to use for evaluation.
eval_steps: Number of steps to run during each evaluation.
train_steps: Number of training steps. This is used to infer the last
checkpoint and stop evaluation loop.
name: Namescope for eval summary.
"""
def terminate_eval():
tf.logging.info('Terminating eval after 180 seconds of no checkpoints')
return True
for ckpt in tf.contrib.training.checkpoints_iterator(
model_dir, min_interval_secs=180, timeout=None,
timeout_fn=terminate_eval):
tf.logging.info('Starting Evaluation.')
try:
eval_results = estimator.evaluate(
input_fn=input_fn,
steps=eval_steps,
checkpoint_path=ckpt,
name=name)
tf.logging.info('Eval results: %s' % eval_results)
# Terminate eval job when final checkpoint is reached
current_step = int(os.path.basename(ckpt).split('-')[1])
if current_step >= train_steps:
tf.logging.info(
'Evaluation finished after training step %d' % current_step)
break
except tf.errors.NotFoundError:
tf.logging.info(
'Checkpoint %s no longer exists, skipping checkpoint' % ckpt)
def populate_experiment(run_config,
hparams,
pipeline_config_path,
train_steps=None,
eval_steps=None,
model_fn_creator=create_model_fn,
**kwargs):
"""Populates an `Experiment` object.
EXPERIMENT CLASS IS DEPRECATED. Please switch to
tf.estimator.train_and_evaluate. As an example, see model_main.py.
Args:
run_config: A `RunConfig`.
hparams: A `HParams`.
pipeline_config_path: A path to a pipeline config file.
train_steps: Number of training steps. If None, the number of training steps
is set from the `TrainConfig` proto.
eval_steps: Number of evaluation steps per evaluation cycle. If None, the
number of evaluation steps is set from the `EvalConfig` proto.
model_fn_creator: A function that creates a `model_fn` for `Estimator`.
Follows the signature:
* Args:
* `detection_model_fn`: Function that returns `DetectionModel` instance.
* `configs`: Dictionary of pipeline config objects.
* `hparams`: `HParams` object.
* Returns:
`model_fn` for `Estimator`.
if __name__ == '__main__': **kwargs: Additional keyword arguments for configuration override.
tf.app.run()
Returns:
An `Experiment` that defines all aspects of training, evaluation, and
export.
"""
tf.logging.warning('Experiment is being deprecated. Please use '
'tf.estimator.train_and_evaluate(). See model_main.py for '
'an example.')
train_and_eval_dict = create_estimator_and_inputs(
run_config,
hparams,
pipeline_config_path,
train_steps=train_steps,
eval_steps=eval_steps,
model_fn_creator=model_fn_creator,
**kwargs)
estimator = train_and_eval_dict['estimator']
train_input_fn = train_and_eval_dict['train_input_fn']
eval_input_fn = train_and_eval_dict['eval_input_fn']
predict_input_fn = train_and_eval_dict['predict_input_fn']
train_steps = train_and_eval_dict['train_steps']
eval_steps = train_and_eval_dict['eval_steps']
export_strategies = [
tf.contrib.learn.utils.saved_model_export_utils.make_export_strategy(
serving_input_fn=predict_input_fn)
]
return tf.contrib.learn.Experiment(
estimator=estimator,
train_input_fn=train_input_fn,
eval_input_fn=eval_input_fn,
train_steps=train_steps,
eval_steps=eval_steps,
export_strategies=export_strategies,
eval_delay_secs=120,)
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Tests for object detection model.""" """Tests for object detection model library."""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
...@@ -24,16 +24,20 @@ import os ...@@ -24,16 +24,20 @@ import os
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from tensorflow.contrib.tpu.python.tpu import tpu_config
from tensorflow.contrib.tpu.python.tpu import tpu_estimator
from object_detection import inputs from object_detection import inputs
from object_detection import model
from object_detection import model_hparams from object_detection import model_hparams
from object_detection import model_test_util from object_detection import model_lib
from object_detection.builders import model_builder from object_detection.builders import model_builder
from object_detection.core import standard_fields as fields from object_detection.core import standard_fields as fields
from object_detection.utils import config_util from object_detection.utils import config_util
MODEL_NAME_FOR_TEST = model_test_util.SSD_INCEPTION_MODEL_NAME # Model for test. Options are:
# 'ssd_inception_v2_pets', 'faster_rcnn_resnet50_pets'
MODEL_NAME_FOR_TEST = 'ssd_inception_v2_pets'
def _get_data_path(): def _get_data_path():
...@@ -42,6 +46,12 @@ def _get_data_path(): ...@@ -42,6 +46,12 @@ def _get_data_path():
'pets_examples.record') 'pets_examples.record')
def get_pipeline_config_path(model_name):
"""Returns path to the local pipeline config file."""
return os.path.join(tf.resource_loader.get_data_files_path(), 'samples',
'configs', model_name + '.config')
def _get_labelmap_path(): def _get_labelmap_path():
"""Returns an absolute path to label map file.""" """Returns an absolute path to label map file."""
return os.path.join(tf.resource_loader.get_data_files_path(), 'data', return os.path.join(tf.resource_loader.get_data_files_path(), 'data',
...@@ -50,7 +60,7 @@ def _get_labelmap_path(): ...@@ -50,7 +60,7 @@ def _get_labelmap_path():
def _get_configs_for_model(model_name): def _get_configs_for_model(model_name):
"""Returns configurations for model.""" """Returns configurations for model."""
filename = model_test_util.GetPipelineConfigPath(model_name) filename = get_pipeline_config_path(model_name)
data_path = _get_data_path() data_path = _get_data_path()
label_map_path = _get_labelmap_path() label_map_path = _get_labelmap_path()
configs = config_util.get_configs_from_pipeline_file(filename) configs = config_util.get_configs_from_pipeline_file(filename)
...@@ -62,31 +72,37 @@ def _get_configs_for_model(model_name): ...@@ -62,31 +72,37 @@ def _get_configs_for_model(model_name):
return configs return configs
def setUpModule(): class ModelLibTest(tf.test.TestCase):
model_test_util.InitializeFlags(MODEL_NAME_FOR_TEST)
class ModelTflearnTest(tf.test.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
tf.reset_default_graph() tf.reset_default_graph()
def _assert_outputs_for_train_eval(self, configs, mode, class_agnostic=False): def _assert_model_fn_for_train_eval(self, configs, mode,
class_agnostic=False):
model_config = configs['model'] model_config = configs['model']
train_config = configs['train_config'] train_config = configs['train_config']
with tf.Graph().as_default(): with tf.Graph().as_default():
if mode == tf.estimator.ModeKeys.TRAIN: if mode == 'train':
features, labels = inputs.create_train_input_fn( features, labels = inputs.create_train_input_fn(
configs['train_config'], configs['train_config'],
configs['train_input_config'], configs['train_input_config'],
configs['model'])() configs['model'])()
model_mode = tf.estimator.ModeKeys.TRAIN
batch_size = train_config.batch_size batch_size = train_config.batch_size
else: elif mode == 'eval':
features, labels = inputs.create_eval_input_fn( features, labels = inputs.create_eval_input_fn(
configs['eval_config'], configs['eval_config'],
configs['eval_input_config'], configs['eval_input_config'],
configs['model'])() configs['model'])()
model_mode = tf.estimator.ModeKeys.EVAL
batch_size = 1
elif mode == 'eval_on_train':
features, labels = inputs.create_eval_input_fn(
configs['eval_config'],
configs['train_input_config'],
configs['model'])()
model_mode = tf.estimator.ModeKeys.EVAL
batch_size = 1 batch_size = 1
detection_model_fn = functools.partial( detection_model_fn = functools.partial(
...@@ -95,8 +111,8 @@ class ModelTflearnTest(tf.test.TestCase): ...@@ -95,8 +111,8 @@ class ModelTflearnTest(tf.test.TestCase):
hparams = model_hparams.create_hparams( hparams = model_hparams.create_hparams(
hparams_overrides='load_pretrained=false') hparams_overrides='load_pretrained=false')
model_fn = model.create_model_fn(detection_model_fn, configs, hparams) model_fn = model_lib.create_model_fn(detection_model_fn, configs, hparams)
estimator_spec = model_fn(features, labels, mode) estimator_spec = model_fn(features, labels, model_mode)
self.assertIsNotNone(estimator_spec.loss) self.assertIsNotNone(estimator_spec.loss)
self.assertIsNotNone(estimator_spec.predictions) self.assertIsNotNone(estimator_spec.predictions)
...@@ -114,11 +130,11 @@ class ModelTflearnTest(tf.test.TestCase): ...@@ -114,11 +130,11 @@ class ModelTflearnTest(tf.test.TestCase):
self.assertEqual(batch_size, detection_scores.shape.as_list()[0]) self.assertEqual(batch_size, detection_scores.shape.as_list()[0])
self.assertEqual(tf.float32, detection_scores.dtype) self.assertEqual(tf.float32, detection_scores.dtype)
self.assertEqual(tf.float32, num_detections.dtype) self.assertEqual(tf.float32, num_detections.dtype)
if mode == tf.estimator.ModeKeys.TRAIN: if model_mode == tf.estimator.ModeKeys.TRAIN:
self.assertIsNotNone(estimator_spec.train_op) self.assertIsNotNone(estimator_spec.train_op)
return estimator_spec return estimator_spec
def _assert_outputs_for_predict(self, configs): def _assert_model_fn_for_predict(self, configs):
model_config = configs['model'] model_config = configs['model']
with tf.Graph().as_default(): with tf.Graph().as_default():
...@@ -132,7 +148,7 @@ class ModelTflearnTest(tf.test.TestCase): ...@@ -132,7 +148,7 @@ class ModelTflearnTest(tf.test.TestCase):
hparams = model_hparams.create_hparams( hparams = model_hparams.create_hparams(
hparams_overrides='load_pretrained=false') hparams_overrides='load_pretrained=false')
model_fn = model.create_model_fn(detection_model_fn, configs, hparams) model_fn = model_lib.create_model_fn(detection_model_fn, configs, hparams)
estimator_spec = model_fn(features, None, tf.estimator.ModeKeys.PREDICT) estimator_spec = model_fn(features, None, tf.estimator.ModeKeys.PREDICT)
self.assertIsNone(estimator_spec.loss) self.assertIsNone(estimator_spec.loss)
...@@ -142,27 +158,146 @@ class ModelTflearnTest(tf.test.TestCase): ...@@ -142,27 +158,146 @@ class ModelTflearnTest(tf.test.TestCase):
self.assertIn(tf.saved_model.signature_constants.PREDICT_METHOD_NAME, self.assertIn(tf.saved_model.signature_constants.PREDICT_METHOD_NAME,
estimator_spec.export_outputs) estimator_spec.export_outputs)
def testModelFnInTrainMode(self): def test_model_fn_in_train_mode(self):
"""Tests the model function in TRAIN mode.""" """Tests the model function in TRAIN mode."""
configs = _get_configs_for_model(MODEL_NAME_FOR_TEST) configs = _get_configs_for_model(MODEL_NAME_FOR_TEST)
self._assert_outputs_for_train_eval(configs, tf.estimator.ModeKeys.TRAIN) self._assert_model_fn_for_train_eval(configs, 'train')
def testModelFnInEvalMode(self): def test_model_fn_in_eval_mode(self):
"""Tests the model function in EVAL mode.""" """Tests the model function in EVAL mode."""
configs = _get_configs_for_model(MODEL_NAME_FOR_TEST) configs = _get_configs_for_model(MODEL_NAME_FOR_TEST)
self._assert_outputs_for_train_eval(configs, tf.estimator.ModeKeys.EVAL) self._assert_model_fn_for_train_eval(configs, 'eval')
def testModelFnInPredictMode(self): def test_model_fn_in_eval_on_train_mode(self):
"""Tests the model function in PREDICT mode.""" """Tests the model function in EVAL mode with train data."""
configs = _get_configs_for_model(MODEL_NAME_FOR_TEST) configs = _get_configs_for_model(MODEL_NAME_FOR_TEST)
self._assert_outputs_for_predict(configs) self._assert_model_fn_for_train_eval(configs, 'eval_on_train')
def testExperiment(self): def test_model_fn_in_predict_mode(self):
"""Tests the model function in PREDICT mode."""
configs = _get_configs_for_model(MODEL_NAME_FOR_TEST)
self._assert_model_fn_for_predict(configs)
def test_create_estimator_and_inputs(self):
"""Tests that Estimator and input function are constructed correctly."""
run_config = tf.estimator.RunConfig()
hparams = model_hparams.create_hparams(
hparams_overrides='load_pretrained=false')
pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST)
train_steps = 20
eval_steps = 10
train_and_eval_dict = model_lib.create_estimator_and_inputs(
run_config,
hparams,
pipeline_config_path,
train_steps=train_steps,
eval_steps=eval_steps)
estimator = train_and_eval_dict['estimator']
train_steps = train_and_eval_dict['train_steps']
eval_steps = train_and_eval_dict['eval_steps']
self.assertIsInstance(estimator, tf.estimator.Estimator)
self.assertEqual(20, train_steps)
self.assertEqual(10, eval_steps)
self.assertIn('train_input_fn', train_and_eval_dict)
self.assertIn('eval_input_fn', train_and_eval_dict)
self.assertIn('eval_on_train_input_fn', train_and_eval_dict)
def test_create_estimator_with_default_train_eval_steps(self):
"""Tests that number of train/eval defaults to config values."""
run_config = tf.estimator.RunConfig()
hparams = model_hparams.create_hparams(
hparams_overrides='load_pretrained=false')
pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST)
configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
config_train_steps = configs['train_config'].num_steps
config_eval_steps = configs['eval_config'].num_examples
train_and_eval_dict = model_lib.create_estimator_and_inputs(
run_config, hparams, pipeline_config_path)
estimator = train_and_eval_dict['estimator']
train_steps = train_and_eval_dict['train_steps']
eval_steps = train_and_eval_dict['eval_steps']
self.assertIsInstance(estimator, tf.estimator.Estimator)
self.assertEqual(config_train_steps, train_steps)
self.assertEqual(config_eval_steps, eval_steps)
def test_create_tpu_estimator_and_inputs(self):
"""Tests that number of train/eval defaults to config values."""
run_config = tpu_config.RunConfig()
hparams = model_hparams.create_hparams(
hparams_overrides='load_pretrained=false')
pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST)
train_steps = 20
eval_steps = 10
train_and_eval_dict = model_lib.create_estimator_and_inputs(
run_config,
hparams,
pipeline_config_path,
train_steps=train_steps,
eval_steps=eval_steps,
use_tpu_estimator=True)
estimator = train_and_eval_dict['estimator']
train_steps = train_and_eval_dict['train_steps']
eval_steps = train_and_eval_dict['eval_steps']
self.assertIsInstance(estimator, tpu_estimator.TPUEstimator)
self.assertEqual(20, train_steps)
self.assertEqual(10, eval_steps)
def test_create_train_and_eval_specs(self):
"""Tests that `TrainSpec` and `EvalSpec` is created correctly."""
run_config = tf.estimator.RunConfig()
hparams = model_hparams.create_hparams(
hparams_overrides='load_pretrained=false')
pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST)
train_steps = 20
eval_steps = 10
train_and_eval_dict = model_lib.create_estimator_and_inputs(
run_config,
hparams,
pipeline_config_path,
train_steps=train_steps,
eval_steps=eval_steps)
train_input_fn = train_and_eval_dict['train_input_fn']
eval_input_fn = train_and_eval_dict['eval_input_fn']
eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn']
predict_input_fn = train_and_eval_dict['predict_input_fn']
train_steps = train_and_eval_dict['train_steps']
eval_steps = train_and_eval_dict['eval_steps']
train_spec, eval_specs = model_lib.create_train_and_eval_specs(
train_input_fn,
eval_input_fn,
eval_on_train_input_fn,
predict_input_fn,
train_steps,
eval_steps,
eval_on_train_data=True,
final_exporter_name='exporter',
eval_spec_name='holdout')
self.assertEqual(train_steps, train_spec.max_steps)
self.assertEqual(2, len(eval_specs))
self.assertEqual(eval_steps, eval_specs[0].steps)
self.assertEqual('holdout', eval_specs[0].name)
self.assertEqual('exporter', eval_specs[0].exporters[0].name)
self.assertEqual(eval_steps, eval_specs[1].steps)
self.assertEqual('eval_on_train', eval_specs[1].name)
def test_experiment(self):
"""Tests that the `Experiment` object is constructed correctly.""" """Tests that the `Experiment` object is constructed correctly."""
experiment = model_test_util.BuildExperiment() run_config = tf.estimator.RunConfig()
model_dir = experiment.estimator.model_dir hparams = model_hparams.create_hparams(
pipeline_config_path = os.path.join(model_dir, 'pipeline.config') hparams_overrides='load_pretrained=false')
self.assertTrue(tf.gfile.Exists(pipeline_config_path)) pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST)
experiment = model_lib.populate_experiment(
run_config,
hparams,
pipeline_config_path,
train_steps=10,
eval_steps=20)
self.assertEqual(10, experiment.train_steps)
self.assertEqual(20, experiment.eval_steps)
class UnbatchTensorsTest(tf.test.TestCase): class UnbatchTensorsTest(tf.test.TestCase):
...@@ -184,7 +319,7 @@ class UnbatchTensorsTest(tf.test.TestCase): ...@@ -184,7 +319,7 @@ class UnbatchTensorsTest(tf.test.TestCase):
fields.InputDataFields.groundtruth_weights: fields.InputDataFields.groundtruth_weights:
groundtruth_weights_placeholder groundtruth_weights_placeholder
} }
unbatched_tensor_dict = model.unstack_batch( unbatched_tensor_dict = model_lib.unstack_batch(
tensor_dict, unpad_groundtruth_tensors=False) tensor_dict, unpad_groundtruth_tensors=False)
with self.test_session() as sess: with self.test_session() as sess:
...@@ -231,7 +366,7 @@ class UnbatchTensorsTest(tf.test.TestCase): ...@@ -231,7 +366,7 @@ class UnbatchTensorsTest(tf.test.TestCase):
fields.InputDataFields.num_groundtruth_boxes: fields.InputDataFields.num_groundtruth_boxes:
num_groundtruth_placeholder num_groundtruth_placeholder
} }
unbatched_tensor_dict = model.unstack_batch( unbatched_tensor_dict = model_lib.unstack_batch(
tensor_dict, unpad_groundtruth_tensors=True) tensor_dict, unpad_groundtruth_tensors=True)
with self.test_session() as sess: with self.test_session() as sess:
unbatched_tensor_dict_out = sess.run( unbatched_tensor_dict_out = sess.run(
......
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Binary to run train and evaluation on object detection model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from absl import flags
import tensorflow as tf
from object_detection import model_hparams
from object_detection import model_lib
flags.DEFINE_string(
'model_dir', None, 'Path to output model directory '
'where event and checkpoint files will be written.')
flags.DEFINE_string('pipeline_config_path', None, 'Path to pipeline config '
'file.')
flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.')
flags.DEFINE_integer('num_eval_steps', None, 'Number of train steps.')
flags.DEFINE_string(
'hparams_overrides', None, 'Hyperparameter overrides, '
'represented as a string containing comma-separated '
'hparam_name=value pairs.')
FLAGS = flags.FLAGS
def main(unused_argv):
flags.mark_flag_as_required('model_dir')
flags.mark_flag_as_required('pipeline_config_path')
config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir)
train_and_eval_dict = model_lib.create_estimator_and_inputs(
run_config=config,
hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
pipeline_config_path=FLAGS.pipeline_config_path,
train_steps=FLAGS.num_train_steps,
eval_steps=FLAGS.num_eval_steps)
estimator = train_and_eval_dict['estimator']
train_input_fn = train_and_eval_dict['train_input_fn']
eval_input_fn = train_and_eval_dict['eval_input_fn']
eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn']
predict_input_fn = train_and_eval_dict['predict_input_fn']
train_steps = train_and_eval_dict['train_steps']
eval_steps = train_and_eval_dict['eval_steps']
train_spec, eval_specs = model_lib.create_train_and_eval_specs(
train_input_fn,
eval_input_fn,
eval_on_train_input_fn,
predict_input_fn,
train_steps,
eval_steps,
eval_on_train_data=False)
# Currently only a single Eval Spec is allowed.
tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Common utils for tests for object detection tflearn model."""
from __future__ import absolute_import
import os
import tempfile
import tensorflow as tf
from object_detection import model
from object_detection import model_hparams
FLAGS = tf.flags.FLAGS
FASTER_RCNN_MODEL_NAME = 'faster_rcnn_resnet50_pets'
SSD_INCEPTION_MODEL_NAME = 'ssd_inception_v2_pets'
def GetPipelineConfigPath(model_name):
"""Returns path to the local pipeline config file."""
return os.path.join(tf.resource_loader.get_data_files_path(), 'samples',
'configs', model_name + '.config')
def InitializeFlags(model_name_for_test):
FLAGS.model_dir = tempfile.mkdtemp()
FLAGS.pipeline_config_path = GetPipelineConfigPath(model_name_for_test)
def BuildExperiment():
"""Builds an Experiment object for testing purposes."""
run_config = tf.contrib.learn.RunConfig()
hparams = model_hparams.create_hparams(
hparams_overrides='load_pretrained=false')
# pylint: disable=protected-access
experiment_fn = model.build_experiment_fn(10, 10)
# pylint: enable=protected-access
return experiment_fn(run_config, hparams)
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Creates and runs `Estimator` for object detection model on TPUs.
This uses the TPUEstimator API to define and run a model in TRAIN/EVAL modes.
"""
# pylint: enable=line-too-long
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import os
import tensorflow as tf
from tensorflow.contrib.tpu.python.tpu import tpu_config
from tensorflow.contrib.tpu.python.tpu import tpu_estimator
from tensorflow.contrib.training.python.training import evaluation
from object_detection import inputs
from object_detection import model
from object_detection import model_hparams
from object_detection.builders import model_builder
from object_detection.utils import config_util
tf.flags.DEFINE_bool('use_tpu', True, 'Use TPUs rather than plain CPUs')
# Cloud TPU Cluster Resolvers
tf.flags.DEFINE_string(
'gcp_project',
default=None,
help='Project name for the Cloud TPU-enabled project. If not specified, we '
'will attempt to automatically detect the GCE project from metadata.')
tf.flags.DEFINE_string(
'tpu_zone',
default=None,
help='GCE zone where the Cloud TPU is located in. If not specified, we '
'will attempt to automatically detect the GCE project from metadata.')
tf.flags.DEFINE_string(
'tpu_name',
default=None,
help='Name of the Cloud TPU for Cluster Resolvers. You must specify either '
'this flag or --master.')
tf.flags.DEFINE_string(
'master', default=None,
help='GRPC URL of the master (e.g. grpc://ip.address.of.tpu:8470). You '
'must specify either this flag or --tpu_name.')
tf.flags.DEFINE_integer('num_shards', 8, 'Number of shards (TPU cores).')
tf.flags.DEFINE_integer('iterations_per_loop', 100,
'Number of iterations per TPU training loop.')
# For mode=train_and_eval, evaluation occurs after training is finished.
# Note: independently of steps_per_checkpoint, estimator will save the most
# recent checkpoint every 10 minutes by default for train_and_eval
tf.flags.DEFINE_string('mode', 'train_and_eval',
'Mode to run: train, eval, train_and_eval')
tf.flags.DEFINE_integer('train_batch_size', 32 * 8, 'Batch size for training.')
# For EVAL.
tf.flags.DEFINE_integer('min_eval_interval_secs', 180,
'Minimum seconds between evaluations.')
tf.flags.DEFINE_integer(
'eval_timeout_secs', None,
'Maximum seconds between checkpoints before evaluation terminates.')
tf.flags.DEFINE_string('hparams_overrides', None, 'Comma-separated list of '
'hyperparameters to override defaults.')
tf.flags.DEFINE_boolean('eval_training_data', False,
'If training data should be evaluated for this job.')
FLAGS = tf.flags.FLAGS
def create_estimator(run_config,
hparams,
pipeline_config_path,
train_steps=None,
eval_steps=None,
train_batch_size=None,
model_fn_creator=model.create_model_fn,
use_tpu=False,
num_shards=1,
params=None,
**kwargs):
"""Creates an `Estimator` object.
Args:
run_config: A `RunConfig`.
hparams: A `HParams`.
pipeline_config_path: A path to a pipeline config file.
train_steps: Number of training steps. If None, the number of training steps
is set from the `TrainConfig` proto.
eval_steps: Number of evaluation steps per evaluation cycle. If None, the
number of evaluation steps is set from the `EvalConfig` proto.
train_batch_size: Training batch size. If none, use batch size from
`TrainConfig` proto.
model_fn_creator: A function that creates a `model_fn` for `Estimator`.
Follows the signature:
* Args:
* `detection_model_fn`: Function that returns `DetectionModel` instance.
* `configs`: Dictionary of pipeline config objects.
* `hparams`: `HParams` object.
* Returns:
`model_fn` for `Estimator`.
use_tpu: Boolean, whether training and evaluation should run on TPU.
num_shards: Number of shards (TPU cores).
params: Parameter dictionary passed from the estimator.
**kwargs: Additional keyword arguments for configuration override.
Returns:
Estimator: A estimator object used for training and evaluation
train_input_fn: Input function for the training loop
eval_validation_input_fn: Input function to run for evaluation on
validation data.
eval_training_input_fn: Input function to run for evaluation on
training data.
train_steps: Number of training steps either from arg `train_steps` or
`TrainConfig` proto
eval_steps: Number of evaluation steps either from arg `eval_steps` or
`EvalConfig` proto
"""
configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
configs = config_util.merge_external_params_with_configs(
configs,
hparams,
train_steps=train_steps,
eval_steps=eval_steps,
batch_size=train_batch_size,
**kwargs)
model_config = configs['model']
train_config = configs['train_config']
train_input_config = configs['train_input_config']
eval_config = configs['eval_config']
eval_input_config = configs['eval_input_config']
if FLAGS.eval_training_data:
eval_input_config = configs['train_input_config']
if params is None:
params = {}
if train_steps is None and train_config.num_steps:
train_steps = train_config.num_steps
if eval_steps is None and eval_config.num_examples:
eval_steps = eval_config.num_examples
detection_model_fn = functools.partial(
model_builder.build, model_config=model_config)
# Create the input functions for TRAIN/EVAL.
train_input_fn = inputs.create_train_input_fn(
train_config=train_config,
train_input_config=train_input_config,
model_config=model_config)
eval_validation_input_fn = inputs.create_eval_input_fn(
eval_config=eval_config,
eval_input_config=eval_input_config,
model_config=model_config)
eval_training_input_fn = inputs.create_eval_input_fn(
eval_config=eval_config,
eval_input_config=train_input_config,
model_config=model_config)
estimator = tpu_estimator.TPUEstimator(
model_fn=model_fn_creator(detection_model_fn, configs, hparams,
use_tpu),
train_batch_size=train_config.batch_size,
# For each core, only batch size 1 is supported for eval.
eval_batch_size=num_shards * 1 if use_tpu else 1,
use_tpu=use_tpu,
config=run_config,
params=params)
return (estimator, train_input_fn, eval_validation_input_fn,
eval_training_input_fn, train_steps, eval_steps)
def main(unused_argv):
tf.flags.mark_flag_as_required('model_dir')
tf.flags.mark_flag_as_required('pipeline_config_path')
if FLAGS.master is None and FLAGS.tpu_name is None:
raise RuntimeError('You must specify either --master or --tpu_name.')
if FLAGS.master is not None:
if FLAGS.tpu_name is not None:
tf.logging.warn('Both --master and --tpu_name are set. Ignoring '
'--tpu_name and using --master.')
tpu_grpc_url = FLAGS.master
else:
tpu_cluster_resolver = (
tf.contrib.cluster_resolver.python.training.TPUClusterResolver(
tpu_names=[FLAGS.tpu_name],
zone=FLAGS.tpu_zone,
project=FLAGS.gcp_project))
tpu_grpc_url = tpu_cluster_resolver.get_master()
config = tpu_config.RunConfig(
master=tpu_grpc_url,
evaluation_master=tpu_grpc_url,
model_dir=FLAGS.model_dir,
tpu_config=tpu_config.TPUConfig(
iterations_per_loop=FLAGS.iterations_per_loop,
num_shards=FLAGS.num_shards))
params = {}
(estimator, train_input_fn, eval_validation_input_fn, eval_training_input_fn,
train_steps, eval_steps) = (
create_estimator(
config,
model_hparams.create_hparams(
hparams_overrides=FLAGS.hparams_overrides),
FLAGS.pipeline_config_path,
train_steps=FLAGS.num_train_steps,
eval_steps=FLAGS.num_eval_steps,
train_batch_size=FLAGS.train_batch_size,
use_tpu=FLAGS.use_tpu,
num_shards=FLAGS.num_shards,
params=params))
if FLAGS.mode in ['train', 'train_and_eval']:
estimator.train(input_fn=train_input_fn, max_steps=train_steps)
if FLAGS.mode == 'train_and_eval':
# Eval one time.
eval_results = estimator.evaluate(
input_fn=eval_validation_input_fn, steps=eval_steps)
tf.logging.info('Eval results: %s' % eval_results)
# Continuously evaluating.
if FLAGS.mode == 'eval':
def terminate_eval():
tf.logging.info('Terminating eval after %d seconds of no checkpoints' %
FLAGS.eval_timeout_secs)
return True
# Run evaluation when there's a new checkpoint.
for ckpt in evaluation.checkpoints_iterator(
FLAGS.model_dir,
min_interval_secs=FLAGS.min_eval_interval_secs,
timeout=FLAGS.eval_timeout_secs,
timeout_fn=terminate_eval):
tf.logging.info('Starting to evaluate.')
if FLAGS.eval_training_data:
name = 'training_data'
input_fn = eval_training_input_fn
else:
name = 'validation_data'
input_fn = eval_validation_input_fn
try:
eval_results = estimator.evaluate(
input_fn=input_fn,
steps=eval_steps,
checkpoint_path=ckpt,
name=name)
tf.logging.info('Eval results: %s' % eval_results)
# Terminate eval job when final checkpoint is reached
current_step = int(os.path.basename(ckpt).split('-')[1])
if current_step >= train_steps:
tf.logging.info(
'Evaluation finished after training step %d' % current_step)
break
except tf.errors.NotFoundError:
tf.logging.info(
'Checkpoint %s no longer exists, skipping checkpoint' % ckpt)
if __name__ == '__main__':
tf.app.run()
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Creates and runs `Estimator` for object detection model on TPUs.
This uses the TPUEstimator API to define and run a model in TRAIN/EVAL modes.
"""
# pylint: enable=line-too-long
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from absl import flags
import tensorflow as tf
from tensorflow.contrib.tpu.python.tpu import tpu_config
from object_detection import model_hparams
from object_detection import model_lib
tf.flags.DEFINE_bool('use_tpu', True, 'Use TPUs rather than plain CPUs')
# Cloud TPU Cluster Resolvers
flags.DEFINE_string(
'gcp_project',
default=None,
help='Project name for the Cloud TPU-enabled project. If not specified, we '
'will attempt to automatically detect the GCE project from metadata.')
flags.DEFINE_string(
'tpu_zone',
default=None,
help='GCE zone where the Cloud TPU is located in. If not specified, we '
'will attempt to automatically detect the GCE project from metadata.')
flags.DEFINE_string(
'tpu_name',
default=None,
help='Name of the Cloud TPU for Cluster Resolvers.')
flags.DEFINE_integer('num_shards', 8, 'Number of shards (TPU cores).')
flags.DEFINE_integer('iterations_per_loop', 100,
'Number of iterations per TPU training loop.')
# For mode=train_and_eval, evaluation occurs after training is finished.
# Note: independently of steps_per_checkpoint, estimator will save the most
# recent checkpoint every 10 minutes by default for train_and_eval
flags.DEFINE_string('mode', 'train',
'Mode to run: train, eval')
flags.DEFINE_integer('train_batch_size', 32 * 8, 'Batch size for training.')
flags.DEFINE_string(
'hparams_overrides', None, 'Comma-separated list of '
'hyperparameters to override defaults.')
flags.DEFINE_boolean('eval_training_data', False,
'If training data should be evaluated for this job.')
flags.DEFINE_string(
'model_dir', None, 'Path to output model directory '
'where event and checkpoint files will be written.')
flags.DEFINE_string('pipeline_config_path', None, 'Path to pipeline config '
'file.')
flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.')
flags.DEFINE_integer('num_eval_steps', None, 'Number of train steps.')
FLAGS = tf.flags.FLAGS
def main(unused_argv):
flags.mark_flag_as_required('model_dir')
flags.mark_flag_as_required('pipeline_config_path')
tpu_cluster_resolver = (
tf.contrib.cluster_resolver.python.training.TPUClusterResolver(
tpu_names=[FLAGS.tpu_name],
zone=FLAGS.tpu_zone,
project=FLAGS.gcp_project))
tpu_grpc_url = tpu_cluster_resolver.get_master()
config = tpu_config.RunConfig(
master=tpu_grpc_url,
evaluation_master=tpu_grpc_url,
model_dir=FLAGS.model_dir,
tpu_config=tpu_config.TPUConfig(
iterations_per_loop=FLAGS.iterations_per_loop,
num_shards=FLAGS.num_shards))
train_and_eval_dict = model_lib.create_estimator_and_inputs(
run_config=config,
hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
pipeline_config_path=FLAGS.pipeline_config_path,
train_steps=FLAGS.num_train_steps,
eval_steps=FLAGS.num_eval_steps,
use_tpu_estimator=True,
use_tpu=FLAGS.use_tpu,
num_shards=FLAGS.num_shards,
batch_size=FLAGS.train_batch_size)
estimator = train_and_eval_dict['estimator']
train_input_fn = train_and_eval_dict['train_input_fn']
eval_input_fn = train_and_eval_dict['eval_input_fn']
eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn']
train_steps = train_and_eval_dict['train_steps']
eval_steps = train_and_eval_dict['eval_steps']
if FLAGS.mode == 'train':
estimator.train(input_fn=train_input_fn, max_steps=train_steps)
# Continuously evaluating.
if FLAGS.mode == 'eval':
if FLAGS.eval_training_data:
name = 'training_data'
input_fn = eval_on_train_input_fn
else:
name = 'validation_data'
input_fn = eval_input_fn
model_lib.continuous_eval(estimator, FLAGS.model_dir, input_fn, eval_steps,
train_steps, name)
if __name__ == '__main__':
tf.app.run()
...@@ -17,16 +17,16 @@ ...@@ -17,16 +17,16 @@
import tensorflow as tf import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators from object_detection.models import feature_map_generators
from object_detection.models import ssd_mobilenet_v1_feature_extractor from object_detection.utils import context_manager
from object_detection.utils import ops from object_detection.utils import ops
from nets import mobilenet_v1 from nets import mobilenet_v1
slim = tf.contrib.slim slim = tf.contrib.slim
class EmbeddedSSDMobileNetV1FeatureExtractor( class EmbeddedSSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor):
"""Embedded-friendly SSD Feature Extractor using MobilenetV1 features. """Embedded-friendly SSD Feature Extractor using MobilenetV1 features.
This feature extractor is similar to SSD MobileNetV1 feature extractor, and This feature extractor is similar to SSD MobileNetV1 feature extractor, and
...@@ -49,12 +49,11 @@ class EmbeddedSSDMobileNetV1FeatureExtractor( ...@@ -49,12 +49,11 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
depth_multiplier, depth_multiplier,
min_depth, min_depth,
pad_to_multiple, pad_to_multiple,
conv_hyperparams, conv_hyperparams_fn,
batch_norm_trainable=True,
reuse_weights=None, reuse_weights=None,
use_explicit_padding=False, use_explicit_padding=False,
use_depthwise=False, use_depthwise=False,
inplace_batchnorm_update=False): override_base_feature_extractor_hyperparams=False):
"""MobileNetV1 Feature Extractor for Embedded-friendly SSD Models. """MobileNetV1 Feature Extractor for Embedded-friendly SSD Models.
Args: Args:
...@@ -63,20 +62,16 @@ class EmbeddedSSDMobileNetV1FeatureExtractor( ...@@ -63,20 +62,16 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
min_depth: minimum feature extractor depth. min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. For EmbeddedSSD it must be set to 1. width dimensions to. For EmbeddedSSD it must be set to 1.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
batch_norm_trainable: Whether to update batch norm parameters during and separable_conv2d ops in the layers that are added on top of the
training or not. When training with a small batch size base feature extractor.
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False. use_depthwise: Whether to use depthwise convolutions. Default is False.
inplace_batchnorm_update: Whether to update batch_norm inplace during override_base_feature_extractor_hyperparams: Whether to override
training. This is required for batch norm to work correctly on TPUs. hyperparameters of the base feature extractor with the one from
When this is false, user must add a control dependency on `conv_hyperparams_fn`.
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
Raises: Raises:
ValueError: upon invalid `pad_to_multiple` values. ValueError: upon invalid `pad_to_multiple` values.
...@@ -87,10 +82,25 @@ class EmbeddedSSDMobileNetV1FeatureExtractor( ...@@ -87,10 +82,25 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
super(EmbeddedSSDMobileNetV1FeatureExtractor, self).__init__( super(EmbeddedSSDMobileNetV1FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights, conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise,
use_explicit_padding, use_depthwise, inplace_batchnorm_update) override_base_feature_extractor_hyperparams)
def _extract_features(self, preprocessed_inputs): def preprocess(self, resized_inputs):
"""SSD preprocessing.
Maps pixel values to the range [-1, 1].
Args:
resized_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
return (2.0 / 255.0) * resized_inputs - 1.0
def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs. """Extract features from preprocessed inputs.
Args: Args:
...@@ -130,16 +140,25 @@ class EmbeddedSSDMobileNetV1FeatureExtractor( ...@@ -130,16 +140,25 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
'use_depthwise': self._use_depthwise, 'use_depthwise': self._use_depthwise,
} }
with slim.arg_scope(self._conv_hyperparams): with tf.variable_scope('MobilenetV1',
with slim.arg_scope([slim.batch_norm], fused=False): reuse=self._reuse_weights) as scope:
with tf.variable_scope('MobilenetV1', with slim.arg_scope(
reuse=self._reuse_weights) as scope: mobilenet_v1.mobilenet_v1_arg_scope(is_training=None)):
_, image_features = mobilenet_v1.mobilenet_v1_base( with (slim.arg_scope(self._conv_hyperparams_fn())
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), if self._override_base_feature_extractor_hyperparams
final_endpoint='Conv2d_13_pointwise', else context_manager.IdentityContextManager()):
min_depth=self._min_depth, # TODO(skligys): Enable fused batch norm once quantization supports it.
depth_multiplier=self._depth_multiplier, with slim.arg_scope([slim.batch_norm], fused=False):
scope=scope) _, image_features = mobilenet_v1.mobilenet_v1_base(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
final_endpoint='Conv2d_13_pointwise',
min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
use_explicit_padding=self._use_explicit_padding,
scope=scope)
with slim.arg_scope(self._conv_hyperparams_fn()):
# TODO(skligys): Enable fused batch norm once quantization supports it.
with slim.arg_scope([slim.batch_norm], fused=False):
feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_map_layout=feature_map_layout, feature_map_layout=feature_map_layout,
depth_multiplier=self._depth_multiplier, depth_multiplier=self._depth_multiplier,
......
...@@ -25,7 +25,7 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest( ...@@ -25,7 +25,7 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase): ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, batch_norm_trainable=True): is_training=True):
"""Constructs a new feature extractor. """Constructs a new feature extractor.
Args: Args:
...@@ -33,18 +33,16 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest( ...@@ -33,18 +33,16 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest(
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
is_training: whether the network is in training mode. is_training: whether the network is in training mode.
batch_norm_trainable: whether to update batch norm parameters during
training.
Returns: Returns:
an ssd_meta_arch.SSDFeatureExtractor object. an ssd_meta_arch.SSDFeatureExtractor object.
""" """
min_depth = 32 min_depth = 32
conv_hyperparams = {}
return (embedded_ssd_mobilenet_v1_feature_extractor. return (embedded_ssd_mobilenet_v1_feature_extractor.
EmbeddedSSDMobileNetV1FeatureExtractor( EmbeddedSSDMobileNetV1FeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable)) self.conv_hyperparams_fn,
override_base_feature_extractor_hyperparams=True))
def test_extract_features_returns_correct_shapes_256(self): def test_extract_features_returns_correct_shapes_256(self):
image_height = 256 image_height = 256
......
...@@ -17,11 +17,30 @@ ...@@ -17,11 +17,30 @@
import tensorflow as tf import tensorflow as tf
from object_detection.meta_architectures import faster_rcnn_meta_arch from object_detection.meta_architectures import faster_rcnn_meta_arch
from object_detection.utils import shape_utils
from nets import mobilenet_v1 from nets import mobilenet_v1
slim = tf.contrib.slim slim = tf.contrib.slim
_MOBILENET_V1_100_CONV_NO_LAST_STRIDE_DEFS = [
mobilenet_v1.Conv(kernel=[3, 3], stride=2, depth=32),
mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=64),
mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=2, depth=128),
mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=128),
mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=2, depth=256),
mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=256),
mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=2, depth=512),
mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=512),
mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=512),
mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=512),
mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=512),
mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=512),
mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=1024),
mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=1024)
]
class FasterRCNNMobilenetV1FeatureExtractor( class FasterRCNNMobilenetV1FeatureExtractor(
faster_rcnn_meta_arch.FasterRCNNFeatureExtractor): faster_rcnn_meta_arch.FasterRCNNFeatureExtractor):
"""Faster R-CNN Mobilenet V1 feature extractor implementation.""" """Faster R-CNN Mobilenet V1 feature extractor implementation."""
...@@ -33,7 +52,8 @@ class FasterRCNNMobilenetV1FeatureExtractor( ...@@ -33,7 +52,8 @@ class FasterRCNNMobilenetV1FeatureExtractor(
reuse_weights=None, reuse_weights=None,
weight_decay=0.0, weight_decay=0.0,
depth_multiplier=1.0, depth_multiplier=1.0,
min_depth=16): min_depth=16,
skip_last_stride=False):
"""Constructor. """Constructor.
Args: Args:
...@@ -44,6 +64,7 @@ class FasterRCNNMobilenetV1FeatureExtractor( ...@@ -44,6 +64,7 @@ class FasterRCNNMobilenetV1FeatureExtractor(
weight_decay: See base class. weight_decay: See base class.
depth_multiplier: float depth multiplier for feature extractor. depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth. min_depth: minimum feature extractor depth.
skip_last_stride: Skip the last stride if True.
Raises: Raises:
ValueError: If `first_stage_features_stride` is not 8 or 16. ValueError: If `first_stage_features_stride` is not 8 or 16.
...@@ -52,6 +73,7 @@ class FasterRCNNMobilenetV1FeatureExtractor( ...@@ -52,6 +73,7 @@ class FasterRCNNMobilenetV1FeatureExtractor(
raise ValueError('`first_stage_features_stride` must be 8 or 16.') raise ValueError('`first_stage_features_stride` must be 8 or 16.')
self._depth_multiplier = depth_multiplier self._depth_multiplier = depth_multiplier
self._min_depth = min_depth self._min_depth = min_depth
self._skip_last_stride = skip_last_stride
super(FasterRCNNMobilenetV1FeatureExtractor, self).__init__( super(FasterRCNNMobilenetV1FeatureExtractor, self).__init__(
is_training, first_stage_features_stride, batch_norm_trainable, is_training, first_stage_features_stride, batch_norm_trainable,
reuse_weights, weight_decay) reuse_weights, weight_decay)
...@@ -91,24 +113,25 @@ class FasterRCNNMobilenetV1FeatureExtractor( ...@@ -91,24 +113,25 @@ class FasterRCNNMobilenetV1FeatureExtractor(
""" """
preprocessed_inputs.get_shape().assert_has_rank(4) preprocessed_inputs.get_shape().assert_has_rank(4)
shape_assert = tf.Assert( preprocessed_inputs = shape_utils.check_min_image_dim(
tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33), min_dim=33, image_tensor=preprocessed_inputs)
tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
['image size must at least be 33 in both height and width.']) with slim.arg_scope(
mobilenet_v1.mobilenet_v1_arg_scope(
with tf.control_dependencies([shape_assert]): is_training=self._train_batch_norm,
with slim.arg_scope( weight_decay=self._weight_decay)):
mobilenet_v1.mobilenet_v1_arg_scope( with tf.variable_scope('MobilenetV1',
is_training=self._train_batch_norm, reuse=self._reuse_weights) as scope:
weight_decay=self._weight_decay)): params = {}
with tf.variable_scope('MobilenetV1', if self._skip_last_stride:
reuse=self._reuse_weights) as scope: params['conv_defs'] = _MOBILENET_V1_100_CONV_NO_LAST_STRIDE_DEFS
_, activations = mobilenet_v1.mobilenet_v1_base( _, activations = mobilenet_v1.mobilenet_v1_base(
preprocessed_inputs, preprocessed_inputs,
final_endpoint='Conv2d_11_pointwise', final_endpoint='Conv2d_11_pointwise',
min_depth=self._min_depth, min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier, depth_multiplier=self._depth_multiplier,
scope=scope) scope=scope,
**params)
return activations['Conv2d_11_pointwise'], activations return activations['Conv2d_11_pointwise'], activations
def _extract_box_classifier_features(self, proposal_feature_maps, scope): def _extract_box_classifier_features(self, proposal_feature_maps, scope):
......
...@@ -26,6 +26,10 @@ from object_detection.utils import test_case ...@@ -26,6 +26,10 @@ from object_detection.utils import test_case
class SsdFeatureExtractorTestBase(test_case.TestCase): class SsdFeatureExtractorTestBase(test_case.TestCase):
def conv_hyperparams_fn(self):
with tf.contrib.slim.arg_scope([]) as sc:
return sc
@abstractmethod @abstractmethod
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
use_explicit_padding=False): use_explicit_padding=False):
......
...@@ -33,12 +33,11 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -33,12 +33,11 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
depth_multiplier, depth_multiplier,
min_depth, min_depth,
pad_to_multiple, pad_to_multiple,
conv_hyperparams, conv_hyperparams_fn,
batch_norm_trainable=True,
reuse_weights=None, reuse_weights=None,
use_explicit_padding=False, use_explicit_padding=False,
use_depthwise=False, use_depthwise=False,
inplace_batchnorm_update=False): override_base_feature_extractor_hyperparams=False):
"""InceptionV2 Feature Extractor for SSD Models. """InceptionV2 Feature Extractor for SSD Models.
Args: Args:
...@@ -47,25 +46,30 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -47,25 +46,30 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
min_depth: minimum feature extractor depth. min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
batch_norm_trainable: Whether to update batch norm parameters during and separable_conv2d ops in the layers that are added on top of the
training or not. When training with a small batch size base feature extractor.
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False. use_depthwise: Whether to use depthwise convolutions. Default is False.
inplace_batchnorm_update: Whether to update batch_norm inplace during override_base_feature_extractor_hyperparams: Whether to override
training. This is required for batch norm to work correctly on TPUs. hyperparameters of the base feature extractor with the one from
When this is false, user must add a control dependency on `conv_hyperparams_fn`.
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters. Raises:
ValueError: If `override_base_feature_extractor_hyperparams` is False.
""" """
super(SSDInceptionV2FeatureExtractor, self).__init__( super(SSDInceptionV2FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights, conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise,
use_explicit_padding, use_depthwise, inplace_batchnorm_update) override_base_feature_extractor_hyperparams)
if not self._override_base_feature_extractor_hyperparams:
raise ValueError('SSD Inception V2 feature extractor always uses'
'scope returned by `conv_hyperparams_fn` for both the '
'base feature extractor and the additional layers '
'added since there is no arg_scope defined for the base '
'feature extractor.')
def preprocess(self, resized_inputs): def preprocess(self, resized_inputs):
"""SSD preprocessing. """SSD preprocessing.
...@@ -82,7 +86,7 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -82,7 +86,7 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
""" """
return (2.0 / 255.0) * resized_inputs - 1.0 return (2.0 / 255.0) * resized_inputs - 1.0
def _extract_features(self, preprocessed_inputs): def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs. """Extract features from preprocessed inputs.
Args: Args:
...@@ -103,7 +107,7 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -103,7 +107,7 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
'use_depthwise': self._use_depthwise, 'use_depthwise': self._use_depthwise,
} }
with slim.arg_scope(self._conv_hyperparams): with slim.arg_scope(self._conv_hyperparams_fn()):
with tf.variable_scope('InceptionV2', with tf.variable_scope('InceptionV2',
reuse=self._reuse_weights) as scope: reuse=self._reuse_weights) as scope:
_, image_features = inception_v2.inception_v2_base( _, image_features = inception_v2.inception_v2_base(
......
...@@ -25,7 +25,7 @@ class SsdInceptionV2FeatureExtractorTest( ...@@ -25,7 +25,7 @@ class SsdInceptionV2FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase): ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, batch_norm_trainable=True): is_training=True):
"""Constructs a SsdInceptionV2FeatureExtractor. """Constructs a SsdInceptionV2FeatureExtractor.
Args: Args:
...@@ -33,16 +33,15 @@ class SsdInceptionV2FeatureExtractorTest( ...@@ -33,16 +33,15 @@ class SsdInceptionV2FeatureExtractorTest(
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
is_training: whether the network is in training mode. is_training: whether the network is in training mode.
batch_norm_trainable: Whether to update batch norm parameters during
training or not
Returns: Returns:
an ssd_inception_v2_feature_extractor.SsdInceptionV2FeatureExtractor. an ssd_inception_v2_feature_extractor.SsdInceptionV2FeatureExtractor.
""" """
min_depth = 32 min_depth = 32
conv_hyperparams = {}
return ssd_inception_v2_feature_extractor.SSDInceptionV2FeatureExtractor( return ssd_inception_v2_feature_extractor.SSDInceptionV2FeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable) self.conv_hyperparams_fn,
override_base_feature_extractor_hyperparams=True)
def test_extract_features_returns_correct_shapes_128(self): def test_extract_features_returns_correct_shapes_128(self):
image_height = 128 image_height = 128
......
...@@ -33,12 +33,11 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -33,12 +33,11 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
depth_multiplier, depth_multiplier,
min_depth, min_depth,
pad_to_multiple, pad_to_multiple,
conv_hyperparams, conv_hyperparams_fn,
batch_norm_trainable=True,
reuse_weights=None, reuse_weights=None,
use_explicit_padding=False, use_explicit_padding=False,
use_depthwise=False, use_depthwise=False,
inplace_batchnorm_update=False): override_base_feature_extractor_hyperparams=False):
"""InceptionV3 Feature Extractor for SSD Models. """InceptionV3 Feature Extractor for SSD Models.
Args: Args:
...@@ -47,25 +46,31 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -47,25 +46,31 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
min_depth: minimum feature extractor depth. min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
batch_norm_trainable: Whether to update batch norm parameters during and separable_conv2d ops in the layers that are added on top of the
training or not. When training with a small batch size base feature extractor.
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False. use_depthwise: Whether to use depthwise convolutions. Default is False.
inplace_batchnorm_update: Whether to update batch_norm inplace during override_base_feature_extractor_hyperparams: Whether to override
training. This is required for batch norm to work correctly on TPUs. hyperparameters of the base feature extractor with the one from
When this is false, user must add a control dependency on `conv_hyperparams_fn`.
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters. Raises:
ValueError: If `override_base_feature_extractor_hyperparams` is False.
""" """
super(SSDInceptionV3FeatureExtractor, self).__init__( super(SSDInceptionV3FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights, conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise,
use_explicit_padding, use_depthwise, inplace_batchnorm_update) override_base_feature_extractor_hyperparams)
if not self._override_base_feature_extractor_hyperparams:
raise ValueError('SSD Inception V3 feature extractor always uses'
'scope returned by `conv_hyperparams_fn` for both the '
'base feature extractor and the additional layers '
'added since there is no arg_scope defined for the base '
'feature extractor.')
def preprocess(self, resized_inputs): def preprocess(self, resized_inputs):
"""SSD preprocessing. """SSD preprocessing.
...@@ -82,7 +87,7 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -82,7 +87,7 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
""" """
return (2.0 / 255.0) * resized_inputs - 1.0 return (2.0 / 255.0) * resized_inputs - 1.0
def _extract_features(self, preprocessed_inputs): def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs. """Extract features from preprocessed inputs.
Args: Args:
...@@ -103,7 +108,7 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -103,7 +108,7 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
'use_depthwise': self._use_depthwise, 'use_depthwise': self._use_depthwise,
} }
with slim.arg_scope(self._conv_hyperparams): with slim.arg_scope(self._conv_hyperparams_fn()):
with tf.variable_scope('InceptionV3', reuse=self._reuse_weights) as scope: with tf.variable_scope('InceptionV3', reuse=self._reuse_weights) as scope:
_, image_features = inception_v3.inception_v3_base( _, image_features = inception_v3.inception_v3_base(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
......
...@@ -25,7 +25,7 @@ class SsdInceptionV3FeatureExtractorTest( ...@@ -25,7 +25,7 @@ class SsdInceptionV3FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase): ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, batch_norm_trainable=True): is_training=True):
"""Constructs a SsdInceptionV3FeatureExtractor. """Constructs a SsdInceptionV3FeatureExtractor.
Args: Args:
...@@ -33,16 +33,15 @@ class SsdInceptionV3FeatureExtractorTest( ...@@ -33,16 +33,15 @@ class SsdInceptionV3FeatureExtractorTest(
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
is_training: whether the network is in training mode. is_training: whether the network is in training mode.
batch_norm_trainable: Whether to update batch norm parameters during
training or not
Returns: Returns:
an ssd_inception_v3_feature_extractor.SsdInceptionV3FeatureExtractor. an ssd_inception_v3_feature_extractor.SsdInceptionV3FeatureExtractor.
""" """
min_depth = 32 min_depth = 32
conv_hyperparams = {}
return ssd_inception_v3_feature_extractor.SSDInceptionV3FeatureExtractor( return ssd_inception_v3_feature_extractor.SSDInceptionV3FeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable) self.conv_hyperparams_fn,
override_base_feature_extractor_hyperparams=True)
def test_extract_features_returns_correct_shapes_128(self): def test_extract_features_returns_correct_shapes_128(self):
image_height = 128 image_height = 128
......
...@@ -19,6 +19,7 @@ import tensorflow as tf ...@@ -19,6 +19,7 @@ import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators from object_detection.models import feature_map_generators
from object_detection.utils import context_manager
from object_detection.utils import ops from object_detection.utils import ops
from object_detection.utils import shape_utils from object_detection.utils import shape_utils
from nets import mobilenet_v1 from nets import mobilenet_v1
...@@ -34,12 +35,11 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -34,12 +35,11 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
depth_multiplier, depth_multiplier,
min_depth, min_depth,
pad_to_multiple, pad_to_multiple,
conv_hyperparams, conv_hyperparams_fn,
batch_norm_trainable=True,
reuse_weights=None, reuse_weights=None,
use_explicit_padding=False, use_explicit_padding=False,
use_depthwise=False, use_depthwise=False,
inplace_batchnorm_update=False): override_base_feature_extractor_hyperparams=False):
"""MobileNetV1 Feature Extractor for SSD Models. """MobileNetV1 Feature Extractor for SSD Models.
Args: Args:
...@@ -48,26 +48,22 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -48,26 +48,22 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
min_depth: minimum feature extractor depth. min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
batch_norm_trainable: Whether to update batch norm parameters during and separable_conv2d ops in the layers that are added on top of the
training or not. When training with a small batch size base feature extractor.
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding inputs so that the output dimensions are the same as if 'SAME' padding
were used. were used.
use_depthwise: Whether to use depthwise convolutions. Default is False. use_depthwise: Whether to use depthwise convolutions. Default is False.
inplace_batchnorm_update: Whether to update batch_norm inplace during override_base_feature_extractor_hyperparams: Whether to override
training. This is required for batch norm to work correctly on TPUs. hyperparameters of the base feature extractor with the one from
When this is false, user must add a control dependency on `conv_hyperparams_fn`.
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
""" """
super(SSDMobileNetV1FeatureExtractor, self).__init__( super(SSDMobileNetV1FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights, conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise,
use_explicit_padding, use_depthwise, inplace_batchnorm_update) override_base_feature_extractor_hyperparams)
def preprocess(self, resized_inputs): def preprocess(self, resized_inputs):
"""SSD preprocessing. """SSD preprocessing.
...@@ -84,7 +80,7 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -84,7 +80,7 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
""" """
return (2.0 / 255.0) * resized_inputs - 1.0 return (2.0 / 255.0) * resized_inputs - 1.0
def _extract_features(self, preprocessed_inputs): def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs. """Extract features from preprocessed inputs.
Args: Args:
...@@ -110,17 +106,20 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -110,17 +106,20 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
reuse=self._reuse_weights) as scope: reuse=self._reuse_weights) as scope:
with slim.arg_scope( with slim.arg_scope(
mobilenet_v1.mobilenet_v1_arg_scope( mobilenet_v1.mobilenet_v1_arg_scope(
is_training=(self._batch_norm_trainable and self._is_training))): is_training=None, regularize_depthwise=True)):
with (slim.arg_scope(self._conv_hyperparams_fn())
if self._override_base_feature_extractor_hyperparams
else context_manager.IdentityContextManager()):
# TODO(skligys): Enable fused batch norm once quantization supports it. # TODO(skligys): Enable fused batch norm once quantization supports it.
with slim.arg_scope([slim.batch_norm], fused=False): with slim.arg_scope([slim.batch_norm], fused=False):
_, image_features = mobilenet_v1.mobilenet_v1_base( _, image_features = mobilenet_v1.mobilenet_v1_base(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
final_endpoint='Conv2d_13_pointwise', final_endpoint='Conv2d_13_pointwise',
min_depth=self._min_depth, min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier, depth_multiplier=self._depth_multiplier,
use_explicit_padding=self._use_explicit_padding, use_explicit_padding=self._use_explicit_padding,
scope=scope) scope=scope)
with slim.arg_scope(self._conv_hyperparams): with slim.arg_scope(self._conv_hyperparams_fn()):
# TODO(skligys): Enable fused batch norm once quantization supports it. # TODO(skligys): Enable fused batch norm once quantization supports it.
with slim.arg_scope([slim.batch_norm], fused=False): with slim.arg_scope([slim.batch_norm], fused=False):
feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_maps = feature_map_generators.multi_resolution_feature_maps(
......
...@@ -27,8 +27,7 @@ class SsdMobilenetV1FeatureExtractorTest( ...@@ -27,8 +27,7 @@ class SsdMobilenetV1FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase): ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, batch_norm_trainable=True, is_training=True, use_explicit_padding=False):
use_explicit_padding=False):
"""Constructs a new feature extractor. """Constructs a new feature extractor.
Args: Args:
...@@ -36,8 +35,6 @@ class SsdMobilenetV1FeatureExtractorTest( ...@@ -36,8 +35,6 @@ class SsdMobilenetV1FeatureExtractorTest(
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
is_training: whether the network is in training mode. is_training: whether the network is in training mode.
batch_norm_trainable: Whether to update batch norm parameters during
training or not.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding inputs so that the output dimensions are the same as if 'SAME' padding
were used. were used.
...@@ -45,11 +42,9 @@ class SsdMobilenetV1FeatureExtractorTest( ...@@ -45,11 +42,9 @@ class SsdMobilenetV1FeatureExtractorTest(
an ssd_meta_arch.SSDFeatureExtractor object. an ssd_meta_arch.SSDFeatureExtractor object.
""" """
min_depth = 32 min_depth = 32
with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm) as sc:
conv_hyperparams = sc
return ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor( return ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable=batch_norm_trainable, self.conv_hyperparams_fn,
use_explicit_padding=use_explicit_padding) use_explicit_padding=use_explicit_padding)
def test_extract_features_returns_correct_shapes_128(self): def test_extract_features_returns_correct_shapes_128(self):
......
...@@ -19,6 +19,7 @@ import tensorflow as tf ...@@ -19,6 +19,7 @@ import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators from object_detection.models import feature_map_generators
from object_detection.utils import context_manager
from object_detection.utils import ops from object_detection.utils import ops
from object_detection.utils import shape_utils from object_detection.utils import shape_utils
from nets.mobilenet import mobilenet from nets.mobilenet import mobilenet
...@@ -35,12 +36,11 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -35,12 +36,11 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
depth_multiplier, depth_multiplier,
min_depth, min_depth,
pad_to_multiple, pad_to_multiple,
conv_hyperparams, conv_hyperparams_fn,
batch_norm_trainable=True,
reuse_weights=None, reuse_weights=None,
use_explicit_padding=False, use_explicit_padding=False,
use_depthwise=False, use_depthwise=False,
inplace_batchnorm_update=False): override_base_feature_extractor_hyperparams=False):
"""MobileNetV2 Feature Extractor for SSD Models. """MobileNetV2 Feature Extractor for SSD Models.
Mobilenet v2 (experimental), designed by sandler@. More details can be found Mobilenet v2 (experimental), designed by sandler@. More details can be found
...@@ -52,25 +52,21 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -52,25 +52,21 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
min_depth: minimum feature extractor depth. min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
batch_norm_trainable: Whether to update batch norm parameters during and separable_conv2d ops in the layers that are added on top of the
training or not. When training with a small batch size base feature extractor.
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False. use_depthwise: Whether to use depthwise convolutions. Default is False.
inplace_batchnorm_update: Whether to update batch_norm inplace during override_base_feature_extractor_hyperparams: Whether to override
training. This is required for batch norm to work correctly on TPUs. hyperparameters of the base feature extractor with the one from
When this is false, user must add a control dependency on `conv_hyperparams_fn`.
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
""" """
super(SSDMobileNetV2FeatureExtractor, self).__init__( super(SSDMobileNetV2FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights, conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise,
use_explicit_padding, use_depthwise, inplace_batchnorm_update) override_base_feature_extractor_hyperparams)
def preprocess(self, resized_inputs): def preprocess(self, resized_inputs):
"""SSD preprocessing. """SSD preprocessing.
...@@ -87,7 +83,7 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -87,7 +83,7 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
""" """
return (2.0 / 255.0) * resized_inputs - 1.0 return (2.0 / 255.0) * resized_inputs - 1.0
def _extract_features(self, preprocessed_inputs): def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs. """Extract features from preprocessed inputs.
Args: Args:
...@@ -110,21 +106,22 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -110,21 +106,22 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope: with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope:
with slim.arg_scope( with slim.arg_scope(
mobilenet_v2.training_scope( mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \
is_training=(self._is_training and self._batch_norm_trainable),
bn_decay=0.9997)), \
slim.arg_scope( slim.arg_scope(
[mobilenet.depth_multiplier], min_depth=self._min_depth): [mobilenet.depth_multiplier], min_depth=self._min_depth):
# TODO(b/68150321): Enable fused batch norm once quantization with (slim.arg_scope(self._conv_hyperparams_fn())
# supports it. if self._override_base_feature_extractor_hyperparams else
with slim.arg_scope([slim.batch_norm], fused=False): context_manager.IdentityContextManager()):
_, image_features = mobilenet_v2.mobilenet_base( # TODO(b/68150321): Enable fused batch norm once quantization
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), # supports it.
final_endpoint='layer_19', with slim.arg_scope([slim.batch_norm], fused=False):
depth_multiplier=self._depth_multiplier, _, image_features = mobilenet_v2.mobilenet_base(
use_explicit_padding=self._use_explicit_padding, ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
scope=scope) final_endpoint='layer_19',
with slim.arg_scope(self._conv_hyperparams): depth_multiplier=self._depth_multiplier,
use_explicit_padding=self._use_explicit_padding,
scope=scope)
with slim.arg_scope(self._conv_hyperparams_fn()):
# TODO(b/68150321): Enable fused batch norm once quantization # TODO(b/68150321): Enable fused batch norm once quantization
# supports it. # supports it.
with slim.arg_scope([slim.batch_norm], fused=False): with slim.arg_scope([slim.batch_norm], fused=False):
......
...@@ -41,14 +41,12 @@ class SsdMobilenetV2FeatureExtractorTest( ...@@ -41,14 +41,12 @@ class SsdMobilenetV2FeatureExtractorTest(
an ssd_meta_arch.SSDFeatureExtractor object. an ssd_meta_arch.SSDFeatureExtractor object.
""" """
min_depth = 32 min_depth = 32
with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm) as sc:
conv_hyperparams = sc
return ssd_mobilenet_v2_feature_extractor.SSDMobileNetV2FeatureExtractor( return ssd_mobilenet_v2_feature_extractor.SSDMobileNetV2FeatureExtractor(
False, False,
depth_multiplier, depth_multiplier,
min_depth, min_depth,
pad_to_multiple, pad_to_multiple,
conv_hyperparams, self.conv_hyperparams_fn,
use_explicit_padding=use_explicit_padding) use_explicit_padding=use_explicit_padding)
def test_extract_features_returns_correct_shapes_128(self): def test_extract_features_returns_correct_shapes_128(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment