Unverified Commit fd7b6887 authored by Jonathan Huang's avatar Jonathan Huang Committed by GitHub
Browse files

Merge pull request #3293 from pkulzc/master

Internal changes of object_detection 
parents f98ec55e 1efe98bb
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Creates and runs `Experiment` for object detection model.
This uses the TF.learn framework to define and run an object detection model
wrapped in an `Estimator`.
Note that this module is only compatible with SSD Meta architecture at the
moment.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import os
import tensorflow as tf
from google.protobuf import text_format
from tensorflow.contrib.learn.python.learn import learn_runner
from tensorflow.contrib.tpu.python.tpu import tpu_optimizer
from object_detection import eval_util
from object_detection import inputs
from object_detection import model_hparams
from object_detection.builders import model_builder
from object_detection.builders import optimizer_builder
from object_detection.core import standard_fields as fields
from object_detection.metrics import coco_evaluation
from object_detection.utils import config_util
from object_detection.utils import label_map_util
from object_detection.utils import shape_utils
from object_detection.utils import variables_helper
from object_detection.utils import visualization_utils as vis_utils
tf.flags.DEFINE_string('model_dir', None, 'Path to output model directory '
'where event and checkpoint files will be written.')
tf.flags.DEFINE_string('pipeline_config_path', None, 'Path to pipeline config '
'file.')
tf.flags.DEFINE_integer('num_train_steps', 500000, 'Number of train steps.')
tf.flags.DEFINE_integer('num_eval_steps', 10000, 'Number of train steps.')
FLAGS = tf.flags.FLAGS
def _get_groundtruth_data(detection_model, class_agnostic):
"""Extracts groundtruth data from detection_model.
Args:
detection_model: A `DetectionModel` object.
class_agnostic: Whether the detections are class_agnostic.
Returns:
A tuple of:
groundtruth: Dictionary with the following fields:
'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
normalized coordinates.
'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
'groundtruth_masks': 3D float32 tensor of instance masks (if provided in
groundtruth)
class_agnostic: Boolean indicating whether detections are class agnostic.
"""
input_data_fields = fields.InputDataFields()
groundtruth_boxes = detection_model.groundtruth_lists(
fields.BoxListFields.boxes)[0]
# For class-agnostic models, groundtruth one-hot encodings collapse to all
# ones.
if class_agnostic:
groundtruth_boxes_shape = tf.shape(groundtruth_boxes)
groundtruth_classes_one_hot = tf.ones([groundtruth_boxes_shape[0], 1])
else:
groundtruth_classes_one_hot = detection_model.groundtruth_lists(
fields.BoxListFields.classes)[0]
label_id_offset = 1 # Applying label id offset (b/63711816)
groundtruth_classes = (
tf.argmax(groundtruth_classes_one_hot, axis=1) + label_id_offset)
groundtruth = {
input_data_fields.groundtruth_boxes: groundtruth_boxes,
input_data_fields.groundtruth_classes: groundtruth_classes
}
if detection_model.groundtruth_has_field(fields.BoxListFields.masks):
groundtruth[input_data_fields.groundtruth_instance_masks] = (
detection_model.groundtruth_lists(fields.BoxListFields.masks)[0])
return groundtruth
def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True):
"""Unstacks all tensors in `tensor_dict` along 0th dimension.
Unstacks tensor from the tensor dict along 0th dimension and returns a
tensor_dict containing values that are lists of unstacked tensors.
Tensors in the `tensor_dict` are expected to be of one of the three shapes:
1. [batch_size]
2. [batch_size, height, width, channels]
3. [batch_size, num_boxes, d1, d2, ... dn]
When unpad_tensors is set to true, unstacked tensors of form 3 above are
sliced along the `num_boxes` dimension using the value in tensor
field.InputDataFields.num_groundtruth_boxes.
Note that this function has a static list of input data fields and has to be
kept in sync with the InputDataFields defined in core/standard_fields.py
Args:
tensor_dict: A dictionary of batched groundtruth tensors.
unpad_groundtruth_tensors: Whether to remove padding along `num_boxes`
dimension of the groundtruth tensors.
Returns:
A dictionary where the keys are from fields.InputDataFields and values are
a list of unstacked (optionally unpadded) tensors.
Raises:
ValueError: If unpad_tensors is True and `tensor_dict` does not contain
`num_groundtruth_boxes` tensor.
"""
unbatched_tensor_dict = {key: tf.unstack(tensor)
for key, tensor in tensor_dict.items()}
if unpad_groundtruth_tensors:
if (fields.InputDataFields.num_groundtruth_boxes not in
unbatched_tensor_dict):
raise ValueError('`num_groundtruth_boxes` not found in tensor_dict. '
'Keys available: {}'.format(
unbatched_tensor_dict.keys()))
unbatched_unpadded_tensor_dict = {}
unpad_keys = set([
# List of input data fields that are padded along the num_boxes
# dimension. This list has to be kept in sync with InputDataFields in
# standard_fields.py.
fields.InputDataFields.groundtruth_instance_masks,
fields.InputDataFields.groundtruth_classes,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_keypoints,
fields.InputDataFields.groundtruth_group_of,
fields.InputDataFields.groundtruth_difficult,
fields.InputDataFields.groundtruth_is_crowd,
fields.InputDataFields.groundtruth_area,
fields.InputDataFields.groundtruth_weights
]).intersection(set(unbatched_tensor_dict.keys()))
for key in unpad_keys:
unpadded_tensor_list = []
for num_gt, padded_tensor in zip(
unbatched_tensor_dict[fields.InputDataFields.num_groundtruth_boxes],
unbatched_tensor_dict[key]):
tensor_shape = shape_utils.combined_static_and_dynamic_shape(
padded_tensor)
slice_begin = tf.zeros([len(tensor_shape)], dtype=tf.int32)
slice_size = tf.stack(
[num_gt] + [-1 if dim is None else dim for dim in tensor_shape[1:]])
unpadded_tensor = tf.slice(padded_tensor, slice_begin, slice_size)
unpadded_tensor_list.append(unpadded_tensor)
unbatched_unpadded_tensor_dict[key] = unpadded_tensor_list
unbatched_tensor_dict.update(unbatched_unpadded_tensor_dict)
return unbatched_tensor_dict
def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
"""Creates a model function for `Estimator`.
Args:
detection_model_fn: Function that returns a `DetectionModel` instance.
configs: Dictionary of pipeline config objects.
hparams: `HParams` object.
use_tpu: Boolean indicating whether model should be constructed for
use on TPU.
Returns:
`model_fn` for `Estimator`.
"""
train_config = configs['train_config']
eval_input_config = configs['eval_input_config']
def model_fn(features, labels, mode, params=None):
"""Constructs the object detection model.
Args:
features: Dictionary of feature tensors, returned from `input_fn`.
labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL,
otherwise None.
mode: Mode key from tf.estimator.ModeKeys.
params: Parameter dictionary passed from the estimator.
Returns:
An `EstimatorSpec` that encapsulates the model and its serving
configurations.
"""
params = params or {}
total_loss, train_op, detections, export_outputs = None, None, None, None
is_training = mode == tf.estimator.ModeKeys.TRAIN
detection_model = detection_model_fn(is_training=is_training,
add_summaries=(not use_tpu))
scaffold_fn = None
if mode == tf.estimator.ModeKeys.TRAIN:
labels = unstack_batch(
labels,
unpad_groundtruth_tensors=train_config.unpad_groundtruth_tensors)
elif mode == tf.estimator.ModeKeys.EVAL:
labels = unstack_batch(labels, unpad_groundtruth_tensors=False)
if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes]
gt_classes_list = labels[fields.InputDataFields.groundtruth_classes]
gt_masks_list = None
if fields.InputDataFields.groundtruth_instance_masks in labels:
gt_masks_list = labels[
fields.InputDataFields.groundtruth_instance_masks]
gt_keypoints_list = None
if fields.InputDataFields.groundtruth_keypoints in labels:
gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints]
detection_model.provide_groundtruth(
groundtruth_boxes_list=gt_boxes_list,
groundtruth_classes_list=gt_classes_list,
groundtruth_masks_list=gt_masks_list,
groundtruth_keypoints_list=gt_keypoints_list)
preprocessed_images = features[fields.InputDataFields.image]
prediction_dict = detection_model.predict(
preprocessed_images, features[fields.InputDataFields.true_image_shape])
detections = detection_model.postprocess(
prediction_dict, features[fields.InputDataFields.true_image_shape])
if mode == tf.estimator.ModeKeys.TRAIN:
if train_config.fine_tune_checkpoint and hparams.load_pretrained:
asg_map = detection_model.restore_map(
from_detection_checkpoint=train_config.from_detection_checkpoint,
load_all_detection_checkpoint_vars=(
train_config.load_all_detection_checkpoint_vars))
available_var_map = (
variables_helper.get_variables_available_in_checkpoint(
asg_map, train_config.fine_tune_checkpoint,
include_global_step=False))
if use_tpu:
def tpu_scaffold():
tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint,
available_var_map)
return tf.train.Scaffold()
scaffold_fn = tpu_scaffold
else:
tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint,
available_var_map)
if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
losses_dict = detection_model.loss(
prediction_dict, features[fields.InputDataFields.true_image_shape])
losses = [loss_tensor for loss_tensor in losses_dict.itervalues()]
total_loss = tf.add_n(losses, name='total_loss')
if mode == tf.estimator.ModeKeys.TRAIN:
global_step = tf.train.get_or_create_global_step()
training_optimizer, optimizer_summary_vars = optimizer_builder.build(
train_config.optimizer)
if use_tpu:
training_optimizer = tpu_optimizer.CrossShardOptimizer(
training_optimizer)
# Optionally freeze some layers by setting their gradients to be zero.
trainable_variables = None
if train_config.freeze_variables:
trainable_variables = tf.contrib.framework.filter_variables(
tf.trainable_variables(),
exclude_patterns=train_config.freeze_variables)
clip_gradients_value = None
if train_config.gradient_clipping_by_norm > 0:
clip_gradients_value = train_config.gradient_clipping_by_norm
if not use_tpu:
for var in optimizer_summary_vars:
tf.summary.scalar(var.op.name, var)
summaries = [] if use_tpu else None
train_op = tf.contrib.layers.optimize_loss(
loss=total_loss,
global_step=global_step,
learning_rate=None,
clip_gradients=clip_gradients_value,
optimizer=training_optimizer,
variables=trainable_variables,
summaries=summaries,
name='') # Preventing scope prefix on all variables.
if mode == tf.estimator.ModeKeys.PREDICT:
export_outputs = {
tf.saved_model.signature_constants.PREDICT_METHOD_NAME:
tf.estimator.export.PredictOutput(detections)
}
eval_metric_ops = None
if mode == tf.estimator.ModeKeys.EVAL:
# Detection summaries during eval.
class_agnostic = (fields.DetectionResultFields.detection_classes
not in detections)
groundtruth = _get_groundtruth_data(detection_model, class_agnostic)
eval_dict = eval_util.result_dict_for_single_example(
tf.expand_dims(features[fields.InputDataFields.original_image][0], 0),
features[inputs.HASH_KEY][0],
detections,
groundtruth,
class_agnostic=class_agnostic,
scale_to_absolute=False)
if class_agnostic:
category_index = label_map_util.create_class_agnostic_category_index()
else:
category_index = label_map_util.create_category_index_from_labelmap(
eval_input_config.label_map_path)
detection_and_groundtruth = vis_utils.draw_side_by_side_evaluation_image(
eval_dict, category_index, max_boxes_to_draw=20, min_score_thresh=0.2)
if not use_tpu:
tf.summary.image('Detections_Left_Groundtruth_Right',
detection_and_groundtruth)
# Eval metrics on a single image.
detection_fields = fields.DetectionResultFields()
input_data_fields = fields.InputDataFields()
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
category_index.values())
eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops(
image_id=eval_dict[input_data_fields.key],
groundtruth_boxes=eval_dict[input_data_fields.groundtruth_boxes],
groundtruth_classes=eval_dict[input_data_fields.groundtruth_classes],
detection_boxes=eval_dict[detection_fields.detection_boxes],
detection_scores=eval_dict[detection_fields.detection_scores],
detection_classes=eval_dict[detection_fields.detection_classes])
if use_tpu:
return tf.contrib.tpu.TPUEstimatorSpec(
mode=mode,
scaffold_fn=scaffold_fn,
predictions=detections,
loss=total_loss,
train_op=train_op,
eval_metrics=eval_metric_ops,
export_outputs=export_outputs)
else:
return tf.estimator.EstimatorSpec(
mode=mode,
predictions=detections,
loss=total_loss,
train_op=train_op,
eval_metric_ops=eval_metric_ops,
export_outputs=export_outputs)
return model_fn
def _build_experiment_fn(train_steps, eval_steps):
"""Returns a function that creates an `Experiment`."""
def build_experiment(run_config, hparams):
"""Builds an `Experiment` from configuration and hyperparameters.
Args:
run_config: A `RunConfig`.
hparams: A `HParams`.
Returns:
An `Experiment` object.
"""
return populate_experiment(run_config, hparams, FLAGS.pipeline_config_path,
train_steps, eval_steps)
return build_experiment
def populate_experiment(run_config,
hparams,
pipeline_config_path,
train_steps=None,
eval_steps=None,
model_fn_creator=create_model_fn,
**kwargs):
"""Populates an `Experiment` object.
Args:
run_config: A `RunConfig`.
hparams: A `HParams`.
pipeline_config_path: A path to a pipeline config file.
train_steps: Number of training steps. If None, the number of training steps
is set from the `TrainConfig` proto.
eval_steps: Number of evaluation steps per evaluation cycle. If None, the
number of evaluation steps is set from the `EvalConfig` proto.
model_fn_creator: A function that creates a `model_fn` for `Estimator`.
Follows the signature:
* Args:
* `detection_model_fn`: Function that returns `DetectionModel` instance.
* `configs`: Dictionary of pipeline config objects.
* `hparams`: `HParams` object.
* Returns:
`model_fn` for `Estimator`.
**kwargs: Additional keyword arguments for configuration override.
Returns:
An `Experiment` that defines all aspects of training, evaluation, and
export.
"""
configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
configs = config_util.merge_external_params_with_configs(
configs,
hparams,
train_steps=train_steps,
eval_steps=eval_steps,
**kwargs)
model_config = configs['model']
train_config = configs['train_config']
train_input_config = configs['train_input_config']
eval_config = configs['eval_config']
eval_input_config = configs['eval_input_config']
if train_steps is None:
train_steps = train_config.num_steps if train_config.num_steps else None
if eval_steps is None:
eval_steps = eval_config.num_examples if eval_config.num_examples else None
detection_model_fn = functools.partial(
model_builder.build, model_config=model_config)
# Create the input functions for TRAIN/EVAL.
train_input_fn = inputs.create_train_input_fn(
train_config=train_config,
train_input_config=train_input_config,
model_config=model_config)
eval_input_fn = inputs.create_eval_input_fn(
eval_config=eval_config,
eval_input_config=eval_input_config,
model_config=model_config)
export_strategies = [
tf.contrib.learn.utils.saved_model_export_utils.make_export_strategy(
serving_input_fn=inputs.create_predict_input_fn(
model_config=model_config))
]
estimator = tf.estimator.Estimator(
model_fn=model_fn_creator(detection_model_fn, configs, hparams),
config=run_config)
if run_config.is_chief:
# Store the final pipeline config for traceability.
pipeline_config_final = config_util.create_pipeline_proto_from_configs(
configs)
pipeline_config_final_path = os.path.join(estimator.model_dir,
'pipeline.config')
config_text = text_format.MessageToString(pipeline_config_final)
with tf.gfile.Open(pipeline_config_final_path, 'wb') as f:
tf.logging.info('Writing as-run pipeline config file to %s',
pipeline_config_final_path)
f.write(config_text)
return tf.contrib.learn.Experiment(
estimator=estimator,
train_input_fn=train_input_fn,
eval_input_fn=eval_input_fn,
train_steps=train_steps,
eval_steps=eval_steps,
export_strategies=export_strategies,
eval_delay_secs=120,)
def main(unused_argv):
tf.flags.mark_flag_as_required('model_dir')
tf.flags.mark_flag_as_required('pipeline_config_path')
config = tf.contrib.learn.RunConfig(model_dir=FLAGS.model_dir)
learn_runner.run(
experiment_fn=_build_experiment_fn(FLAGS.num_train_steps,
FLAGS.num_eval_steps),
run_config=config,
hparams=model_hparams.create_hparams())
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Hyperparameters for the object detection model in TF.learn.
This file consolidates and documents the hyperparameters used by the model.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
def create_hparams(hparams_overrides=None):
"""Returns hyperparameters, including any flag value overrides.
Args:
hparams_overrides: Optional hparams overrides, represented as a
string containing comma-separated hparam_name=value pairs.
Returns:
The hyperparameters as a tf.HParams object.
"""
hparams = tf.contrib.training.HParams(
# Whether a fine tuning checkpoint (provided in the pipeline config)
# should be loaded for training.
load_pretrained=True)
# Override any of the preceding hyperparameter values.
if hparams_overrides:
hparams = hparams.parse(hparams_overrides)
return hparams
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object detection model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import os
import numpy as np
import tensorflow as tf
from object_detection import inputs
from object_detection import model
from object_detection import model_hparams
from object_detection import model_test_util
from object_detection.builders import model_builder
from object_detection.core import standard_fields as fields
from object_detection.utils import config_util
FLAGS = tf.flags.FLAGS
MODEL_NAME_FOR_TEST = model_test_util.SSD_INCEPTION_MODEL_NAME
def _get_data_path():
"""Returns an absolute path to TFRecord file."""
return os.path.join(FLAGS.test_srcdir, model_test_util.PATH_BASE, 'test_data',
'pets_examples.record')
def _get_labelmap_path():
"""Returns an absolute path to label map file."""
return os.path.join(FLAGS.test_srcdir, model_test_util.PATH_BASE, 'data',
'pet_label_map.pbtxt')
def _get_configs_for_model(model_name):
"""Returns configurations for model."""
filename = model_test_util.GetPipelineConfigPath(model_name)
data_path = _get_data_path()
label_map_path = _get_labelmap_path()
configs = config_util.get_configs_from_pipeline_file(filename)
configs = config_util.merge_external_params_with_configs(
configs,
train_input_path=data_path,
eval_input_path=data_path,
label_map_path=label_map_path)
return configs
def setUpModule():
model_test_util.InitializeFlags(MODEL_NAME_FOR_TEST)
class ModelTflearnTest(tf.test.TestCase):
@classmethod
def setUpClass(cls):
tf.reset_default_graph()
def _assert_outputs_for_train_eval(self, configs, mode, class_agnostic=False):
model_config = configs['model']
train_config = configs['train_config']
with tf.Graph().as_default():
if mode == tf.estimator.ModeKeys.TRAIN:
features, labels = inputs.create_train_input_fn(
configs['train_config'],
configs['train_input_config'],
configs['model'])()
batch_size = train_config.batch_size
else:
features, labels = inputs.create_eval_input_fn(
configs['eval_config'],
configs['eval_input_config'],
configs['model'])()
batch_size = 1
detection_model_fn = functools.partial(
model_builder.build, model_config=model_config, is_training=True)
hparams = model_hparams.create_hparams(
hparams_overrides='load_pretrained=false')
model_fn = model.create_model_fn(detection_model_fn, configs, hparams)
estimator_spec = model_fn(features, labels, mode)
self.assertIsNotNone(estimator_spec.loss)
self.assertIsNotNone(estimator_spec.predictions)
if class_agnostic:
self.assertNotIn('detection_classes', estimator_spec.predictions)
else:
detection_classes = estimator_spec.predictions['detection_classes']
self.assertEqual(batch_size, detection_classes.shape.as_list()[0])
self.assertEqual(tf.float32, detection_classes.dtype)
detection_boxes = estimator_spec.predictions['detection_boxes']
detection_scores = estimator_spec.predictions['detection_scores']
num_detections = estimator_spec.predictions['num_detections']
self.assertEqual(batch_size, detection_boxes.shape.as_list()[0])
self.assertEqual(tf.float32, detection_boxes.dtype)
self.assertEqual(batch_size, detection_scores.shape.as_list()[0])
self.assertEqual(tf.float32, detection_scores.dtype)
self.assertEqual(tf.float32, num_detections.dtype)
if mode == tf.estimator.ModeKeys.TRAIN:
self.assertIsNotNone(estimator_spec.train_op)
return estimator_spec
def _assert_outputs_for_predict(self, configs):
model_config = configs['model']
with tf.Graph().as_default():
features, _ = inputs.create_eval_input_fn(
configs['eval_config'],
configs['eval_input_config'],
configs['model'])()
detection_model_fn = functools.partial(
model_builder.build, model_config=model_config, is_training=False)
hparams = model_hparams.create_hparams(
hparams_overrides='load_pretrained=false')
model_fn = model.create_model_fn(detection_model_fn, configs, hparams)
estimator_spec = model_fn(features, None, tf.estimator.ModeKeys.PREDICT)
self.assertIsNone(estimator_spec.loss)
self.assertIsNone(estimator_spec.train_op)
self.assertIsNotNone(estimator_spec.predictions)
self.assertIsNotNone(estimator_spec.export_outputs)
self.assertIn(tf.saved_model.signature_constants.PREDICT_METHOD_NAME,
estimator_spec.export_outputs)
def testModelFnInTrainMode(self):
"""Tests the model function in TRAIN mode."""
configs = _get_configs_for_model(MODEL_NAME_FOR_TEST)
self._assert_outputs_for_train_eval(configs, tf.estimator.ModeKeys.TRAIN)
def testModelFnInEvalMode(self):
"""Tests the model function in EVAL mode."""
configs = _get_configs_for_model(MODEL_NAME_FOR_TEST)
self._assert_outputs_for_train_eval(configs, tf.estimator.ModeKeys.EVAL)
def testModelFnInPredictMode(self):
"""Tests the model function in PREDICT mode."""
configs = _get_configs_for_model(MODEL_NAME_FOR_TEST)
self._assert_outputs_for_predict(configs)
def testExperiment(self):
"""Tests that the `Experiment` object is constructed correctly."""
experiment = model_test_util.BuildExperiment()
model_dir = experiment.estimator.model_dir
pipeline_config_path = os.path.join(model_dir, 'pipeline.config')
self.assertTrue(tf.gfile.Exists(pipeline_config_path))
class UnbatchTensorsTest(tf.test.TestCase):
def test_unbatch_without_unpadding(self):
image_placeholder = tf.placeholder(tf.float32, [2, None, None, None])
groundtruth_boxes_placeholder = tf.placeholder(tf.float32, [2, None, None])
groundtruth_classes_placeholder = tf.placeholder(tf.float32,
[2, None, None])
groundtruth_weights_placeholder = tf.placeholder(tf.float32, [2, None])
tensor_dict = {
fields.InputDataFields.image:
image_placeholder,
fields.InputDataFields.groundtruth_boxes:
groundtruth_boxes_placeholder,
fields.InputDataFields.groundtruth_classes:
groundtruth_classes_placeholder,
fields.InputDataFields.groundtruth_weights:
groundtruth_weights_placeholder
}
unbatched_tensor_dict = model.unstack_batch(
tensor_dict, unpad_groundtruth_tensors=False)
with self.test_session() as sess:
unbatched_tensor_dict_out = sess.run(
unbatched_tensor_dict,
feed_dict={
image_placeholder:
np.random.rand(2, 4, 4, 3).astype(np.float32),
groundtruth_boxes_placeholder:
np.random.rand(2, 5, 4).astype(np.float32),
groundtruth_classes_placeholder:
np.random.rand(2, 5, 6).astype(np.float32),
groundtruth_weights_placeholder:
np.random.rand(2, 5).astype(np.float32)
})
for image_out in unbatched_tensor_dict_out[fields.InputDataFields.image]:
self.assertAllEqual(image_out.shape, [4, 4, 3])
for groundtruth_boxes_out in unbatched_tensor_dict_out[
fields.InputDataFields.groundtruth_boxes]:
self.assertAllEqual(groundtruth_boxes_out.shape, [5, 4])
for groundtruth_classes_out in unbatched_tensor_dict_out[
fields.InputDataFields.groundtruth_classes]:
self.assertAllEqual(groundtruth_classes_out.shape, [5, 6])
for groundtruth_weights_out in unbatched_tensor_dict_out[
fields.InputDataFields.groundtruth_weights]:
self.assertAllEqual(groundtruth_weights_out.shape, [5])
def test_unbatch_and_unpad_groundtruth_tensors(self):
image_placeholder = tf.placeholder(tf.float32, [2, None, None, None])
groundtruth_boxes_placeholder = tf.placeholder(tf.float32, [2, 5, None])
groundtruth_classes_placeholder = tf.placeholder(tf.float32, [2, 5, None])
groundtruth_weights_placeholder = tf.placeholder(tf.float32, [2, 5])
num_groundtruth_placeholder = tf.placeholder(tf.int32, [2])
tensor_dict = {
fields.InputDataFields.image:
image_placeholder,
fields.InputDataFields.groundtruth_boxes:
groundtruth_boxes_placeholder,
fields.InputDataFields.groundtruth_classes:
groundtruth_classes_placeholder,
fields.InputDataFields.groundtruth_weights:
groundtruth_weights_placeholder,
fields.InputDataFields.num_groundtruth_boxes:
num_groundtruth_placeholder
}
unbatched_tensor_dict = model.unstack_batch(
tensor_dict, unpad_groundtruth_tensors=True)
with self.test_session() as sess:
unbatched_tensor_dict_out = sess.run(
unbatched_tensor_dict,
feed_dict={
image_placeholder:
np.random.rand(2, 4, 4, 3).astype(np.float32),
groundtruth_boxes_placeholder:
np.random.rand(2, 5, 4).astype(np.float32),
groundtruth_classes_placeholder:
np.random.rand(2, 5, 6).astype(np.float32),
groundtruth_weights_placeholder:
np.random.rand(2, 5).astype(np.float32),
num_groundtruth_placeholder:
np.array([3, 3], np.int32)
})
for image_out in unbatched_tensor_dict_out[fields.InputDataFields.image]:
self.assertAllEqual(image_out.shape, [4, 4, 3])
for groundtruth_boxes_out in unbatched_tensor_dict_out[
fields.InputDataFields.groundtruth_boxes]:
self.assertAllEqual(groundtruth_boxes_out.shape, [3, 4])
for groundtruth_classes_out in unbatched_tensor_dict_out[
fields.InputDataFields.groundtruth_classes]:
self.assertAllEqual(groundtruth_classes_out.shape, [3, 6])
for groundtruth_weights_out in unbatched_tensor_dict_out[
fields.InputDataFields.groundtruth_weights]:
self.assertAllEqual(groundtruth_weights_out.shape, [3])
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Common utils for tests for object detection tflearn model."""
from __future__ import absolute_import
import os
import tempfile
import tensorflow as tf
from object_detection import model
from object_detection import model_hparams
FLAGS = tf.flags.FLAGS
FASTER_RCNN_MODEL_NAME = 'faster_rcnn_resnet50_pets'
SSD_INCEPTION_MODEL_NAME = 'ssd_inception_v2_pets'
PATH_BASE = 'google3/third_party/tensorflow_models/object_detection/'
def GetPipelineConfigPath(model_name):
"""Returns path to the local pipeline config file."""
return os.path.join(FLAGS.test_srcdir, PATH_BASE, 'samples', 'configs',
model_name + '.config')
def InitializeFlags(model_name_for_test):
FLAGS.model_dir = tempfile.mkdtemp()
FLAGS.pipeline_config_path = GetPipelineConfigPath(model_name_for_test)
def BuildExperiment():
"""Builds an Experiment object for testing purposes."""
run_config = tf.contrib.learn.RunConfig()
hparams = model_hparams.create_hparams(
hparams_overrides='load_pretrained=false')
# pylint: disable=protected-access
experiment_fn = model._build_experiment_fn(10, 10)
# pylint: enable=protected-access
return experiment_fn(run_config, hparams)
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Creates and runs `Estimator` for object detection model on TPUs.
This uses the TPUEstimator API to define and run a model in TRAIN/EVAL modes.
"""
# pylint: enable=line-too-long
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import os
import tensorflow as tf
from tensorflow.contrib.tpu.python.tpu import tpu_config
from tensorflow.contrib.tpu.python.tpu import tpu_estimator
from tensorflow.contrib.training.python.training import evaluation
from object_detection import inputs
from object_detection import model
from object_detection import model_hparams
from object_detection.builders import model_builder
from object_detection.utils import config_util
tf.flags.DEFINE_bool('use_tpu', True, 'Use TPUs rather than plain CPUs')
# Cloud TPU Cluster Resolvers
tf.flags.DEFINE_string(
'gcp_project',
default=None,
help='Project name for the Cloud TPU-enabled project. If not specified, we '
'will attempt to automatically detect the GCE project from metadata.')
tf.flags.DEFINE_string(
'tpu_zone',
default=None,
help='GCE zone where the Cloud TPU is located in. If not specified, we '
'will attempt to automatically detect the GCE project from metadata.')
tf.flags.DEFINE_string(
'tpu_name',
default=None,
help='Name of the Cloud TPU for Cluster Resolvers. You must specify either '
'this flag or --master.')
tf.flags.DEFINE_string(
'master', default=None,
help='GRPC URL of the master (e.g. grpc://ip.address.of.tpu:8470). You '
'must specify either this flag or --tpu_name.')
tf.flags.DEFINE_integer('num_shards', 8, 'Number of shards (TPU cores).')
tf.flags.DEFINE_integer('iterations_per_loop', 100,
'Number of iterations per TPU training loop.')
# For mode=train_and_eval, evaluation occurs after training is finished.
# Note: independently of steps_per_checkpoint, estimator will save the most
# recent checkpoint every 10 minutes by default for train_and_eval
tf.flags.DEFINE_string('mode', 'train_and_eval',
'Mode to run: train, eval, train_and_eval')
tf.flags.DEFINE_integer('train_batch_size', 32 * 8, 'Batch size for training.')
# For EVAL.
tf.flags.DEFINE_integer('min_eval_interval_secs', 180,
'Minimum seconds between evaluations.')
tf.flags.DEFINE_integer(
'eval_timeout_secs', None,
'Maximum seconds between checkpoints before evaluation terminates.')
FLAGS = tf.flags.FLAGS
def create_estimator(run_config,
hparams,
pipeline_config_path,
train_steps=None,
eval_steps=None,
train_batch_size=None,
model_fn_creator=model.create_model_fn,
use_tpu=False,
num_shards=1,
params=None,
**kwargs):
"""Creates an `Estimator` object.
Args:
run_config: A `RunConfig`.
hparams: A `HParams`.
pipeline_config_path: A path to a pipeline config file.
train_steps: Number of training steps. If None, the number of training steps
is set from the `TrainConfig` proto.
eval_steps: Number of evaluation steps per evaluation cycle. If None, the
number of evaluation steps is set from the `EvalConfig` proto.
train_batch_size: Training batch size. If none, use batch size from
`TrainConfig` proto.
model_fn_creator: A function that creates a `model_fn` for `Estimator`.
Follows the signature:
* Args:
* `detection_model_fn`: Function that returns `DetectionModel` instance.
* `configs`: Dictionary of pipeline config objects.
* `hparams`: `HParams` object.
* Returns:
`model_fn` for `Estimator`.
use_tpu: Boolean, whether training and evaluation should run on TPU.
num_shards: Number of shards (TPU cores).
params: Parameter dictionary passed from the estimator.
**kwargs: Additional keyword arguments for configuration override.
Returns:
Estimator: A estimator object used for training and evaluation
train_input_fn: Input function for the training loop
eval_input_fn: Input function for the evaluation run
train_steps: Number of training steps either from arg `train_steps` or
`TrainConfig` proto
eval_steps: Number of evaluation steps either from arg `eval_steps` or
`EvalConfig` proto
"""
configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
configs = config_util.merge_external_params_with_configs(
configs,
hparams,
train_steps=train_steps,
eval_steps=eval_steps,
batch_size=train_batch_size,
**kwargs)
model_config = configs['model']
train_config = configs['train_config']
train_input_config = configs['train_input_config']
eval_config = configs['eval_config']
eval_input_config = configs['eval_input_config']
if params is None:
params = {}
if train_steps is None:
train_steps = train_config.num_steps if train_config.num_steps else None
if eval_steps is None:
eval_steps = eval_config.num_examples if eval_config.num_examples else None
detection_model_fn = functools.partial(
model_builder.build, model_config=model_config)
# Create the input functions for TRAIN/EVAL.
train_input_fn = inputs.create_train_input_fn(
train_config=train_config,
train_input_config=train_input_config,
model_config=model_config)
eval_input_fn = inputs.create_eval_input_fn(
eval_config=eval_config,
eval_input_config=eval_input_config,
model_config=model_config)
estimator = tpu_estimator.TPUEstimator(
model_fn=model_fn_creator(detection_model_fn, configs, hparams,
use_tpu),
train_batch_size=train_config.batch_size,
# For each core, only batch size 1 is supported for eval.
eval_batch_size=num_shards * 1 if use_tpu else 1,
use_tpu=use_tpu,
config=run_config,
params=params)
return estimator, train_input_fn, eval_input_fn, train_steps, eval_steps
def main(unused_argv):
tf.flags.mark_flag_as_required('model_dir')
tf.flags.mark_flag_as_required('pipeline_config_path')
if FLAGS.master is None and FLAGS.tpu_name is None:
raise RuntimeError('You must specify either --master or --tpu_name.')
if FLAGS.master is not None:
if FLAGS.tpu_name is not None:
tf.logging.warn('Both --master and --tpu_name are set. Ignoring '
'--tpu_name and using --master.')
tpu_grpc_url = FLAGS.master
else:
tpu_cluster_resolver = (
tf.contrib.cluster_resolver.python.training.TPUClusterResolver(
tpu_names=[FLAGS.tpu_name],
zone=FLAGS.tpu_zone,
project=FLAGS.gcp_project))
tpu_grpc_url = tpu_cluster_resolver.get_master()
config = tpu_config.RunConfig(
master=tpu_grpc_url,
evaluation_master=tpu_grpc_url,
model_dir=FLAGS.model_dir,
tpu_config=tpu_config.TPUConfig(
iterations_per_loop=FLAGS.iterations_per_loop,
num_shards=FLAGS.num_shards))
params = {}
estimator, train_input_fn, eval_input_fn, train_steps, eval_steps = (
create_estimator(
config,
model_hparams.create_hparams(),
FLAGS.pipeline_config_path,
train_steps=FLAGS.num_train_steps,
eval_steps=FLAGS.num_eval_steps,
train_batch_size=FLAGS.train_batch_size,
use_tpu=FLAGS.use_tpu,
num_shards=FLAGS.num_shards,
params=params))
if FLAGS.mode in ['train', 'train_and_eval']:
estimator.train(input_fn=train_input_fn, max_steps=train_steps)
if FLAGS.mode == 'train_and_eval':
# Eval one time.
eval_results = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
tf.logging.info('Eval results: %s' % eval_results)
# Continuously evaluating.
if FLAGS.mode == 'eval':
def terminate_eval():
tf.logging.info('Terminating eval after %d seconds of no checkpoints' %
FLAGS.eval_timeout_secs)
return True
# Run evaluation when there's a new checkpoint.
for ckpt in evaluation.checkpoints_iterator(
FLAGS.model_dir,
min_interval_secs=FLAGS.min_eval_interval_secs,
timeout=FLAGS.eval_timeout_secs,
timeout_fn=terminate_eval):
tf.logging.info('Starting to evaluate.')
try:
eval_results = estimator.evaluate(
input_fn=eval_input_fn,
steps=eval_steps,
checkpoint_path=ckpt)
tf.logging.info('Eval results: %s' % eval_results)
# Terminate eval job when final checkpoint is reached
current_step = int(os.path.basename(ckpt).split('-')[1])
if current_step >= train_steps:
tf.logging.info(
'Evaluation finished after training step %d' % current_step)
break
except tf.errors.NotFoundError:
tf.logging.info(
'Checkpoint %s no longer exists, skipping checkpoint' % ckpt)
if __name__ == '__main__':
tf.app.run()
......@@ -15,6 +15,7 @@ py_library(
],
deps = [
"//tensorflow",
"//tensorflow/models/research/object_detection/utils:ops",
],
)
......@@ -36,6 +37,7 @@ py_library(
],
deps = [
"//tensorflow",
"//tensorflow/models/research/object_detection/utils:test_case",
],
)
......@@ -47,9 +49,10 @@ py_library(
deps = [
":feature_map_generators",
"//tensorflow",
"//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch",
"//tensorflow_models/object_detection/utils:ops",
"//tensorflow_models/slim:inception_v2",
"//tensorflow/models/research/object_detection/meta_architectures:ssd_meta_arch",
"//tensorflow/models/research/object_detection/utils:ops",
"//tensorflow/models/research/object_detection/utils:shape_utils",
"//third_party/tensorflow_models/slim:inception_v2",
],
)
......@@ -61,9 +64,10 @@ py_library(
deps = [
":feature_map_generators",
"//tensorflow",
"//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch",
"//tensorflow_models/object_detection/utils:ops",
"//tensorflow_models/slim:inception_v3",
"//tensorflow/models/research/object_detection/meta_architectures:ssd_meta_arch",
"//tensorflow/models/research/object_detection/utils:ops",
"//tensorflow/models/research/object_detection/utils:shape_utils",
"//third_party/tensorflow_models/slim:inception_v3",
],
)
......@@ -73,9 +77,10 @@ py_library(
deps = [
":feature_map_generators",
"//tensorflow",
"//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch",
"//tensorflow_models/object_detection/utils:ops",
"//tensorflow_models/slim:mobilenet_v1",
"//tensorflow/models/research/object_detection/meta_architectures:ssd_meta_arch",
"//tensorflow/models/research/object_detection/utils:ops",
"//tensorflow/models/research/object_detection/utils:shape_utils",
"//third_party/tensorflow_models/slim:mobilenet_v1",
],
)
......@@ -86,8 +91,40 @@ py_library(
":feature_map_generators",
":ssd_mobilenet_v1_feature_extractor",
"//tensorflow",
"//tensorflow_models/object_detection/utils:ops",
"//tensorflow_models/slim:mobilenet_v1",
"//tensorflow/models/research/object_detection/utils:ops",
"//third_party/tensorflow_models/slim:mobilenet_v1",
],
)
py_library(
name = "ssd_resnet_v1_fpn_feature_extractor",
srcs = ["ssd_resnet_v1_fpn_feature_extractor.py"],
deps = [
":feature_map_generators",
"//tensorflow",
"//tensorflow/models/research/object_detection/meta_architectures:ssd_meta_arch",
"//tensorflow/models/research/object_detection/utils:ops",
"//tensorflow/models/research/object_detection/utils:shape_utils",
"//third_party/tensorflow_models/slim:resnet_v1",
],
)
py_library(
name = "ssd_resnet_v1_fpn_feature_extractor_testbase",
srcs = ["ssd_resnet_v1_fpn_feature_extractor_testbase.py"],
deps = [
"//tensorflow/models/research/object_detection/models:ssd_feature_extractor_test",
],
)
py_test(
name = "ssd_resnet_v1_fpn_feature_extractor_test",
timeout = "long",
srcs = ["ssd_resnet_v1_fpn_feature_extractor_test.py"],
deps = [
":ssd_resnet_v1_fpn_feature_extractor",
":ssd_resnet_v1_fpn_feature_extractor_testbase",
"//tensorflow",
],
)
......@@ -153,8 +190,8 @@ py_library(
],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch",
"//tensorflow_models/slim:nasnet",
"//tensorflow/models/research/object_detection/meta_architectures:faster_rcnn_meta_arch",
"//third_party/tensorflow_models/slim:nasnet",
],
)
......@@ -165,8 +202,8 @@ py_library(
],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch",
"//tensorflow_models/slim:inception_resnet_v2",
"//tensorflow/models/research/object_detection/meta_architectures:faster_rcnn_meta_arch",
"//third_party/tensorflow_models/slim:inception_resnet_v2",
],
)
......@@ -188,8 +225,8 @@ py_library(
],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch",
"//tensorflow_models/slim:inception_v2",
"//tensorflow/models/research/object_detection/meta_architectures:faster_rcnn_meta_arch",
"//third_party/tensorflow_models/slim:inception_v2",
],
)
......@@ -211,9 +248,9 @@ py_library(
],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch",
"//tensorflow_models/slim:resnet_utils",
"//tensorflow_models/slim:resnet_v1",
"//tensorflow/models/research/object_detection/meta_architectures:faster_rcnn_meta_arch",
"//third_party/tensorflow_models/slim:resnet_utils",
"//third_party/tensorflow_models/slim:resnet_v1",
],
)
......
......@@ -51,7 +51,9 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
pad_to_multiple,
conv_hyperparams,
batch_norm_trainable=True,
reuse_weights=None):
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
"""MobileNetV1 Feature Extractor for Embedded-friendly SSD Models.
Args:
......@@ -66,6 +68,9 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
Raises:
ValueError: upon invalid `pad_to_multiple` values.
......@@ -76,7 +81,8 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
super(EmbeddedSSDMobileNetV1FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights)
conv_hyperparams, batch_norm_trainable, reuse_weights,
use_explicit_padding, use_depthwise)
def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
......@@ -88,13 +94,25 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
Raises:
ValueError: if image height or width are not 256 pixels.
"""
preprocessed_inputs.get_shape().assert_has_rank(4)
shape_assert = tf.Assert(
tf.logical_and(
tf.equal(tf.shape(preprocessed_inputs)[1], 256),
tf.equal(tf.shape(preprocessed_inputs)[2], 256)),
['image size must be 256 in both height and width.'])
image_shape = preprocessed_inputs.get_shape()
image_shape.assert_has_rank(4)
image_height = image_shape[1].value
image_width = image_shape[2].value
if image_height is None or image_width is None:
shape_assert = tf.Assert(
tf.logical_and(tf.equal(tf.shape(preprocessed_inputs)[1], 256),
tf.equal(tf.shape(preprocessed_inputs)[2], 256)),
['image size must be 256 in both height and width.'])
with tf.control_dependencies([shape_assert]):
preprocessed_inputs = tf.identity(preprocessed_inputs)
elif image_height != 256 or image_width != 256:
raise ValueError('image size must be = 256 in both height and width;'
' image dim = %d,%d' % (image_height, image_width))
feature_map_layout = {
'from_layer': [
......@@ -102,10 +120,12 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
],
'layer_depth': [-1, -1, 512, 256, 256],
'conv_kernel_size': [-1, -1, 3, 3, 2],
'use_explicit_padding': self._use_explicit_padding,
'use_depthwise': self._use_depthwise,
}
with tf.control_dependencies([shape_assert]):
with slim.arg_scope(self._conv_hyperparams):
with slim.arg_scope(self._conv_hyperparams):
with slim.arg_scope([slim.batch_norm], fused=False):
with tf.variable_scope('MobilenetV1',
reuse=self._reuse_weights) as scope:
_, image_features = mobilenet_v1.mobilenet_v1_base(
......
......@@ -22,7 +22,7 @@ from object_detection.models import ssd_feature_extractor_test
class EmbeddedSSDMobileNetV1FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase):
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, batch_norm_trainable=True):
......@@ -51,11 +51,23 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest(
image_width = 256
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(4, 16, 16, 512), (4, 8, 8, 1024),
(4, 4, 4, 512), (4, 2, 2, 256),
(4, 1, 1, 256)]
expected_feature_map_shape = [(2, 16, 16, 512), (2, 8, 8, 1024),
(2, 4, 4, 512), (2, 2, 2, 256),
(2, 1, 1, 256)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_with_dynamic_inputs(self):
image_height = 256
image_width = 256
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(2, 16, 16, 512), (2, 8, 8, 1024),
(2, 4, 4, 512), (2, 2, 2, 256),
(2, 1, 1, 256)]
self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
......@@ -63,10 +75,10 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest(
image_width = 256
depth_multiplier = 0.5**12
pad_to_multiple = 1
expected_feature_map_shape = [(4, 16, 16, 32), (4, 8, 8, 32), (4, 4, 4, 32),
(4, 2, 2, 32), (4, 1, 1, 32)]
expected_feature_map_shape = [(2, 16, 16, 32), (2, 8, 8, 32), (2, 4, 4, 32),
(2, 2, 2, 32), (2, 1, 1, 32)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_with_pad_to_multiple_of_1(
......@@ -75,11 +87,11 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest(
image_width = 256
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(4, 16, 16, 512), (4, 8, 8, 1024),
(4, 4, 4, 512), (4, 2, 2, 256),
(4, 1, 1, 256)]
expected_feature_map_shape = [(2, 16, 16, 512), (2, 8, 8, 1024),
(2, 4, 4, 512), (2, 2, 2, 256),
(2, 1, 1, 256)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_raises_error_with_pad_to_multiple_not_1(self):
......
......@@ -180,7 +180,7 @@ class FasterRCNNInceptionResnetV2FeatureExtractor(
faster_rcnn_meta_arch.FasterRCNNFeatureExtractor which does not work for
InceptionResnetV2 checkpoints.
TODO: revisit whether it's possible to force the
TODO(jonathanhuang,rathodv): revisit whether it's possible to force the
`Repeat` namescope as created in `_extract_box_classifier_features` to
start counting at 2 (e.g. `Repeat_2`) so that the default restore_fn can
be used.
......
......@@ -111,7 +111,8 @@ class FasterRCNNResnetV1FeatureExtractor(
with tf.control_dependencies([shape_assert]):
# Disables batchnorm for fine-tuning with smaller batch sizes.
# TODO: Figure out if it is needed when image batch size is bigger.
# TODO: Figure out if it is needed when image
# batch size is bigger.
with slim.arg_scope(
resnet_utils.resnet_arg_scope(
batch_norm_epsilon=1e-5,
......
......@@ -25,6 +25,7 @@ of final feature maps.
"""
import collections
import tensorflow as tf
from object_detection.utils import ops
slim = tf.contrib.slim
......@@ -115,6 +116,9 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
feature_map_keys = []
feature_maps = []
base_from_layer = ''
use_explicit_padding = False
if 'use_explicit_padding' in feature_map_layout:
use_explicit_padding = feature_map_layout['use_explicit_padding']
use_depthwise = False
if 'use_depthwise' in feature_map_layout:
use_depthwise = feature_map_layout['use_depthwise']
......@@ -139,16 +143,21 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
padding='SAME',
stride=1,
scope=layer_name)
stride = 2
layer_name = '{}_2_Conv2d_{}_{}x{}_s2_{}'.format(
base_from_layer, index, conv_kernel_size, conv_kernel_size,
depth_fn(layer_depth))
stride = 2
padding = 'SAME'
if use_explicit_padding:
padding = 'VALID'
intermediate_layer = ops.fixed_padding(
intermediate_layer, conv_kernel_size)
if use_depthwise:
feature_map = slim.separable_conv2d(
intermediate_layer,
None, [conv_kernel_size, conv_kernel_size],
depth_multiplier=1,
padding='SAME',
padding=padding,
stride=stride,
scope=layer_name + '_depthwise')
feature_map = slim.conv2d(
......@@ -161,10 +170,56 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
feature_map = slim.conv2d(
intermediate_layer,
depth_fn(layer_depth), [conv_kernel_size, conv_kernel_size],
padding='SAME',
padding=padding,
stride=stride,
scope=layer_name)
feature_map_keys.append(layer_name)
feature_maps.append(feature_map)
return collections.OrderedDict(
[(x, y) for (x, y) in zip(feature_map_keys, feature_maps)])
def fpn_top_down_feature_maps(image_features, depth, scope=None):
"""Generates `top-down` feature maps for Feature Pyramid Networks.
See https://arxiv.org/abs/1612.03144 for details.
Args:
image_features: list of image feature tensors. Spatial resolutions of
succesive tensors must reduce exactly by a factor of 2.
depth: depth of output feature maps.
scope: A scope name to wrap this op under.
Returns:
feature_maps: an OrderedDict mapping keys (feature map names) to
tensors where each tensor has shape [batch, height_i, width_i, depth_i].
"""
with tf.variable_scope(
scope, 'top_down', image_features):
num_levels = len(image_features)
output_feature_maps_list = []
output_feature_map_keys = []
with slim.arg_scope(
[slim.conv2d],
activation_fn=None, normalizer_fn=None, padding='SAME', stride=1):
top_down = slim.conv2d(
image_features[-1],
depth, [1, 1], scope='projection_%d' % num_levels)
output_feature_maps_list.append(top_down)
output_feature_map_keys.append(
'top_down_feature_map_%d' % (num_levels - 1))
for level in reversed(range(num_levels - 1)):
top_down = ops.nearest_neighbor_upsampling(top_down, 2)
residual = slim.conv2d(
image_features[level], depth, [1, 1],
scope='projection_%d' % (level + 1))
top_down = 0.5 * top_down + 0.5 * residual
output_feature_maps_list.append(slim.conv2d(
top_down,
depth, [3, 3],
activation_fn=None,
scope='smoothing_%d' % (level + 1)))
output_feature_map_keys.append('top_down_feature_map_%d' % level)
return collections.OrderedDict(
reversed(zip(output_feature_map_keys, output_feature_maps_list)))
......@@ -40,7 +40,7 @@ EMBEDDED_SSD_MOBILENET_V1_LAYOUT = {
}
# TODO(rathodv): add tests with different anchor strides.
# TODO: add tests with different anchor strides.
class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
def test_get_expected_feature_map_shapes_with_inception_v2(self):
......@@ -134,6 +134,34 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
class FPNFeatureMapGeneratorTest(tf.test.TestCase):
def test_get_expected_feature_map_shapes(self):
image_features = [
tf.random_uniform([4, 8, 8, 256], dtype=tf.float32),
tf.random_uniform([4, 4, 4, 256], dtype=tf.float32),
tf.random_uniform([4, 2, 2, 256], dtype=tf.float32),
tf.random_uniform([4, 1, 1, 256], dtype=tf.float32),
]
feature_maps = feature_map_generators.fpn_top_down_feature_maps(
image_features=image_features, depth=128)
expected_feature_map_shapes = {
'top_down_feature_map_0': (4, 8, 8, 128),
'top_down_feature_map_1': (4, 4, 4, 128),
'top_down_feature_map_2': (4, 2, 2, 128),
'top_down_feature_map_3': (4, 1, 1, 128)
}
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
out_feature_maps = sess.run(feature_maps)
out_feature_map_shapes = {key: value.shape
for key, value in out_feature_maps.items()}
self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
class GetDepthFunctionTest(tf.test.TestCase):
def test_return_min_depth_when_multiplier_is_small(self):
......
......@@ -17,33 +17,14 @@
from abc import abstractmethod
import itertools
import numpy as np
import tensorflow as tf
from object_detection.utils import test_case
class SsdFeatureExtractorTestBase(object):
def _validate_features_shape(self,
feature_extractor,
preprocessed_inputs,
expected_feature_map_shapes):
"""Checks the extracted features are of correct shape.
Args:
feature_extractor: The feature extractor to test.
preprocessed_inputs: A [batch, height, width, 3] tensor to extract
features with.
expected_feature_map_shapes: The expected shape of the extracted features.
"""
feature_maps = feature_extractor.extract_features(preprocessed_inputs)
feature_map_shapes = [tf.shape(feature_map) for feature_map in feature_maps]
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
feature_map_shapes_out = sess.run(feature_map_shapes)
for shape_out, exp_shape_out in zip(
feature_map_shapes_out, expected_feature_map_shapes):
self.assertAllEqual(shape_out, exp_shape_out)
class SsdFeatureExtractorTestBase(test_case.TestCase):
@abstractmethod
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple):
......@@ -59,14 +40,39 @@ class SsdFeatureExtractorTestBase(object):
pass
def check_extract_features_returns_correct_shape(
self, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shapes_out):
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
preprocessed_inputs = tf.random_uniform(
[4, image_height, image_width, 3], dtype=tf.float32)
self._validate_features_shape(
feature_extractor, preprocessed_inputs, expected_feature_map_shapes_out)
self, batch_size, image_height, image_width, depth_multiplier,
pad_to_multiple, expected_feature_map_shapes):
def graph_fn(image_tensor):
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
feature_maps = feature_extractor.extract_features(image_tensor)
return feature_maps
image_tensor = np.random.rand(batch_size, image_height, image_width,
3).astype(np.float32)
feature_maps = self.execute(graph_fn, [image_tensor])
for feature_map, expected_shape in itertools.izip(
feature_maps, expected_feature_map_shapes):
self.assertAllEqual(feature_map.shape, expected_shape)
def check_extract_features_returns_correct_shapes_with_dynamic_inputs(
self, batch_size, image_height, image_width, depth_multiplier,
pad_to_multiple, expected_feature_map_shapes):
def graph_fn(image_height, image_width):
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
image_tensor = tf.random_uniform([batch_size, image_height, image_width,
3], dtype=tf.float32)
feature_maps = feature_extractor.extract_features(image_tensor)
return feature_maps
feature_maps = self.execute_cpu(graph_fn, [
np.array(image_height, dtype=np.int32),
np.array(image_width, dtype=np.int32)
])
for feature_map, expected_shape in itertools.izip(
feature_maps, expected_feature_map_shapes):
self.assertAllEqual(feature_map.shape, expected_shape)
def check_extract_features_raises_error_with_invalid_image_size(
self, image_height, image_width, depth_multiplier, pad_to_multiple):
......
......@@ -19,6 +19,7 @@ import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators
from object_detection.utils import ops
from object_detection.utils import shape_utils
from nets import inception_v2
slim = tf.contrib.slim
......@@ -34,7 +35,9 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
pad_to_multiple,
conv_hyperparams,
batch_norm_trainable=True,
reuse_weights=None):
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
"""InceptionV2 Feature Extractor for SSD Models.
Args:
......@@ -49,10 +52,14 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
"""
super(SSDInceptionV2FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights)
conv_hyperparams, batch_norm_trainable, reuse_weights,
use_explicit_padding, use_depthwise)
def preprocess(self, resized_inputs):
"""SSD preprocessing.
......@@ -80,32 +87,30 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
preprocessed_inputs.get_shape().assert_has_rank(4)
shape_assert = tf.Assert(
tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
['image size must at least be 33 in both height and width.'])
preprocessed_inputs = shape_utils.check_min_image_dim(
33, preprocessed_inputs)
feature_map_layout = {
'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', ''],
'layer_depth': [-1, -1, 512, 256, 256, 128],
'use_explicit_padding': self._use_explicit_padding,
'use_depthwise': self._use_depthwise,
}
with tf.control_dependencies([shape_assert]):
with slim.arg_scope(self._conv_hyperparams):
with tf.variable_scope('InceptionV2',
reuse=self._reuse_weights) as scope:
_, image_features = inception_v2.inception_v2_base(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
final_endpoint='Mixed_5c',
min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
scope=scope)
feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_map_layout=feature_map_layout,
depth_multiplier=self._depth_multiplier,
min_depth=self._min_depth,
insert_1x1_conv=True,
image_features=image_features)
with slim.arg_scope(self._conv_hyperparams):
with tf.variable_scope('InceptionV2',
reuse=self._reuse_weights) as scope:
_, image_features = inception_v2.inception_v2_base(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
final_endpoint='Mixed_5c',
min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
scope=scope)
feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_map_layout=feature_map_layout,
depth_multiplier=self._depth_multiplier,
min_depth=self._min_depth,
insert_1x1_conv=True,
image_features=image_features)
return feature_maps.values()
......@@ -22,7 +22,7 @@ from object_detection.models import ssd_inception_v2_feature_extractor
class SsdInceptionV2FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase):
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, batch_norm_trainable=True):
......@@ -49,11 +49,23 @@ class SsdInceptionV2FeatureExtractorTest(
image_width = 128
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(4, 8, 8, 576), (4, 4, 4, 1024),
(4, 2, 2, 512), (4, 1, 1, 256),
(4, 1, 1, 256), (4, 1, 1, 128)]
expected_feature_map_shape = [(2, 8, 8, 576), (2, 4, 4, 1024),
(2, 2, 2, 512), (2, 1, 1, 256),
(2, 1, 1, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_with_dynamic_inputs(self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(2, 8, 8, 576), (2, 4, 4, 1024),
(2, 2, 2, 512), (2, 1, 1, 256),
(2, 1, 1, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_299(self):
......@@ -61,11 +73,11 @@ class SsdInceptionV2FeatureExtractorTest(
image_width = 299
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(4, 19, 19, 576), (4, 10, 10, 1024),
(4, 5, 5, 512), (4, 3, 3, 256),
(4, 2, 2, 256), (4, 1, 1, 128)]
expected_feature_map_shape = [(2, 19, 19, 576), (2, 10, 10, 1024),
(2, 5, 5, 512), (2, 3, 3, 256),
(2, 2, 2, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
......@@ -73,11 +85,11 @@ class SsdInceptionV2FeatureExtractorTest(
image_width = 299
depth_multiplier = 0.5**12
pad_to_multiple = 1
expected_feature_map_shape = [(4, 19, 19, 128), (4, 10, 10, 128),
(4, 5, 5, 32), (4, 3, 3, 32),
(4, 2, 2, 32), (4, 1, 1, 32)]
expected_feature_map_shape = [(2, 19, 19, 128), (2, 10, 10, 128),
(2, 5, 5, 32), (2, 3, 3, 32),
(2, 2, 2, 32), (2, 1, 1, 32)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
......@@ -85,11 +97,11 @@ class SsdInceptionV2FeatureExtractorTest(
image_width = 299
depth_multiplier = 1.0
pad_to_multiple = 32
expected_feature_map_shape = [(4, 20, 20, 576), (4, 10, 10, 1024),
(4, 5, 5, 512), (4, 3, 3, 256),
(4, 2, 2, 256), (4, 1, 1, 128)]
expected_feature_map_shape = [(2, 20, 20, 576), (2, 10, 10, 1024),
(2, 5, 5, 512), (2, 3, 3, 256),
(2, 2, 2, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_raises_error_with_invalid_image_size(self):
......
......@@ -19,6 +19,7 @@ import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators
from object_detection.utils import ops
from object_detection.utils import shape_utils
from nets import inception_v3
slim = tf.contrib.slim
......@@ -34,7 +35,9 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
pad_to_multiple,
conv_hyperparams,
batch_norm_trainable=True,
reuse_weights=None):
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
"""InceptionV3 Feature Extractor for SSD Models.
Args:
......@@ -49,10 +52,14 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
"""
super(SSDInceptionV3FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights)
conv_hyperparams, batch_norm_trainable, reuse_weights,
use_explicit_padding, use_depthwise)
def preprocess(self, resized_inputs):
"""SSD preprocessing.
......@@ -80,32 +87,29 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
preprocessed_inputs.get_shape().assert_has_rank(4)
shape_assert = tf.Assert(
tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
['image size must at least be 33 in both height and width.'])
preprocessed_inputs = shape_utils.check_min_image_dim(
33, preprocessed_inputs)
feature_map_layout = {
'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 128],
'use_explicit_padding': self._use_explicit_padding,
'use_depthwise': self._use_depthwise,
}
with tf.control_dependencies([shape_assert]):
with slim.arg_scope(self._conv_hyperparams):
with tf.variable_scope('InceptionV3',
reuse=self._reuse_weights) as scope:
_, image_features = inception_v3.inception_v3_base(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
final_endpoint='Mixed_7c',
min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
scope=scope)
feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_map_layout=feature_map_layout,
depth_multiplier=self._depth_multiplier,
min_depth=self._min_depth,
insert_1x1_conv=True,
image_features=image_features)
with slim.arg_scope(self._conv_hyperparams):
with tf.variable_scope('InceptionV3', reuse=self._reuse_weights) as scope:
_, image_features = inception_v3.inception_v3_base(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
final_endpoint='Mixed_7c',
min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
scope=scope)
feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_map_layout=feature_map_layout,
depth_multiplier=self._depth_multiplier,
min_depth=self._min_depth,
insert_1x1_conv=True,
image_features=image_features)
return feature_maps.values()
......@@ -22,7 +22,7 @@ from object_detection.models import ssd_inception_v3_feature_extractor
class SsdInceptionV3FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase):
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, batch_norm_trainable=True):
......@@ -49,11 +49,23 @@ class SsdInceptionV3FeatureExtractorTest(
image_width = 128
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(4, 13, 13, 288), (4, 6, 6, 768),
(4, 2, 2, 2048), (4, 1, 1, 512),
(4, 1, 1, 256), (4, 1, 1, 128)]
expected_feature_map_shape = [(2, 13, 13, 288), (2, 6, 6, 768),
(2, 2, 2, 2048), (2, 1, 1, 512),
(2, 1, 1, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_with_dynamic_inputs(self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(2, 13, 13, 288), (2, 6, 6, 768),
(2, 2, 2, 2048), (2, 1, 1, 512),
(2, 1, 1, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_299(self):
......@@ -61,11 +73,11 @@ class SsdInceptionV3FeatureExtractorTest(
image_width = 299
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(4, 35, 35, 288), (4, 17, 17, 768),
(4, 8, 8, 2048), (4, 4, 4, 512),
(4, 2, 2, 256), (4, 1, 1, 128)]
expected_feature_map_shape = [(2, 35, 35, 288), (2, 17, 17, 768),
(2, 8, 8, 2048), (2, 4, 4, 512),
(2, 2, 2, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
......@@ -73,11 +85,11 @@ class SsdInceptionV3FeatureExtractorTest(
image_width = 299
depth_multiplier = 0.5**12
pad_to_multiple = 1
expected_feature_map_shape = [(4, 35, 35, 128), (4, 17, 17, 128),
(4, 8, 8, 192), (4, 4, 4, 32),
(4, 2, 2, 32), (4, 1, 1, 32)]
expected_feature_map_shape = [(2, 35, 35, 128), (2, 17, 17, 128),
(2, 8, 8, 192), (2, 4, 4, 32),
(2, 2, 2, 32), (2, 1, 1, 32)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
......@@ -85,11 +97,11 @@ class SsdInceptionV3FeatureExtractorTest(
image_width = 299
depth_multiplier = 1.0
pad_to_multiple = 32
expected_feature_map_shape = [(4, 37, 37, 288), (4, 18, 18, 768),
(4, 8, 8, 2048), (4, 4, 4, 512),
(4, 2, 2, 256), (4, 1, 1, 128)]
expected_feature_map_shape = [(2, 37, 37, 288), (2, 18, 18, 768),
(2, 8, 8, 2048), (2, 4, 4, 512),
(2, 2, 2, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_raises_error_with_invalid_image_size(self):
......
......@@ -20,6 +20,7 @@ import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators
from object_detection.utils import ops
from object_detection.utils import shape_utils
from nets import mobilenet_v1
slim = tf.contrib.slim
......@@ -35,7 +36,9 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
pad_to_multiple,
conv_hyperparams,
batch_norm_trainable=True,
reuse_weights=None):
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
"""MobileNetV1 Feature Extractor for SSD Models.
Args:
......@@ -50,10 +53,14 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
"""
super(SSDMobileNetV1FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights)
conv_hyperparams, batch_norm_trainable, reuse_weights,
use_explicit_padding, use_depthwise)
def preprocess(self, resized_inputs):
"""SSD preprocessing.
......@@ -81,34 +88,33 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
preprocessed_inputs.get_shape().assert_has_rank(4)
shape_assert = tf.Assert(
tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
['image size must at least be 33 in both height and width.'])
preprocessed_inputs = shape_utils.check_min_image_dim(
33, preprocessed_inputs)
feature_map_layout = {
'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '',
'', ''],
'layer_depth': [-1, -1, 512, 256, 256, 128],
'use_explicit_padding': self._use_explicit_padding,
'use_depthwise': self._use_depthwise,
}
with tf.control_dependencies([shape_assert]):
with slim.arg_scope(self._conv_hyperparams):
with slim.arg_scope([slim.batch_norm], fused=False):
with tf.variable_scope('MobilenetV1',
reuse=self._reuse_weights) as scope:
_, image_features = mobilenet_v1.mobilenet_v1_base(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
final_endpoint='Conv2d_13_pointwise',
min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
scope=scope)
feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_map_layout=feature_map_layout,
depth_multiplier=self._depth_multiplier,
min_depth=self._min_depth,
insert_1x1_conv=True,
image_features=image_features)
with slim.arg_scope(self._conv_hyperparams):
# TODO: Enable fused batch norm once quantization supports it.
with slim.arg_scope([slim.batch_norm], fused=False):
with tf.variable_scope('MobilenetV1',
reuse=self._reuse_weights) as scope:
_, image_features = mobilenet_v1.mobilenet_v1_base(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
final_endpoint='Conv2d_13_pointwise',
min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
scope=scope)
feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_map_layout=feature_map_layout,
depth_multiplier=self._depth_multiplier,
min_depth=self._min_depth,
insert_1x1_conv=True,
image_features=image_features)
return feature_maps.values()
......@@ -24,7 +24,7 @@ slim = tf.contrib.slim
class SsdMobilenetV1FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase):
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, batch_norm_trainable=True):
......@@ -52,11 +52,11 @@ class SsdMobilenetV1FeatureExtractorTest(
image_width = 128
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(4, 8, 8, 512), (4, 4, 4, 1024),
(4, 2, 2, 512), (4, 1, 1, 256),
(4, 1, 1, 256), (4, 1, 1, 128)]
expected_feature_map_shape = [(2, 8, 8, 512), (2, 4, 4, 1024),
(2, 2, 2, 512), (2, 1, 1, 256),
(2, 1, 1, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_299(self):
......@@ -64,11 +64,23 @@ class SsdMobilenetV1FeatureExtractorTest(
image_width = 299
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(4, 19, 19, 512), (4, 10, 10, 1024),
(4, 5, 5, 512), (4, 3, 3, 256),
(4, 2, 2, 256), (4, 1, 1, 128)]
expected_feature_map_shape = [(2, 19, 19, 512), (2, 10, 10, 1024),
(2, 5, 5, 512), (2, 3, 3, 256),
(2, 2, 2, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_with_dynamic_image_shape(self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(2, 8, 8, 512), (2, 4, 4, 1024),
(2, 2, 2, 512), (2, 1, 1, 256),
(2, 1, 1, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
......@@ -76,11 +88,11 @@ class SsdMobilenetV1FeatureExtractorTest(
image_width = 299
depth_multiplier = 0.5**12
pad_to_multiple = 1
expected_feature_map_shape = [(4, 19, 19, 32), (4, 10, 10, 32),
(4, 5, 5, 32), (4, 3, 3, 32),
(4, 2, 2, 32), (4, 1, 1, 32)]
expected_feature_map_shape = [(2, 19, 19, 32), (2, 10, 10, 32),
(2, 5, 5, 32), (2, 3, 3, 32),
(2, 2, 2, 32), (2, 1, 1, 32)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
......@@ -88,11 +100,11 @@ class SsdMobilenetV1FeatureExtractorTest(
image_width = 299
depth_multiplier = 1.0
pad_to_multiple = 32
expected_feature_map_shape = [(4, 20, 20, 512), (4, 10, 10, 1024),
(4, 5, 5, 512), (4, 3, 3, 256),
(4, 2, 2, 256), (4, 1, 1, 128)]
expected_feature_map_shape = [(2, 20, 20, 512), (2, 10, 10, 1024),
(2, 5, 5, 512), (2, 3, 3, 256),
(2, 2, 2, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_raises_error_with_invalid_image_size(self):
......@@ -108,7 +120,7 @@ class SsdMobilenetV1FeatureExtractorTest(
image_width = 128
depth_multiplier = 1
pad_to_multiple = 1
test_image = np.random.rand(4, image_height, image_width, 3)
test_image = np.random.rand(2, image_height, image_width, 3)
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
preprocessed_image = feature_extractor.preprocess(test_image)
......
"""SSD Feature Pyramid Network (FPN) feature extractors based on Resnet v1.
See https://arxiv.org/abs/1708.02002 for details.
"""
import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators
from object_detection.utils import ops
from object_detection.utils import shape_utils
from nets import resnet_v1
slim = tf.contrib.slim
class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""SSD FPN feature extractor based on Resnet v1 architecture."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
resnet_base_fn,
resnet_scope_name,
fpn_scope_name,
batch_norm_trainable=True,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
"""SSD FPN feature extractor based on Resnet v1 architecture.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
UNUSED currently.
min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
resnet_base_fn: base resnet network to use.
resnet_scope_name: scope name under which to construct resnet
fpn_scope_name: scope name under which to construct the feature pyramid
network.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
Raises:
ValueError: On supplying invalid arguments for unused arguments.
"""
super(_SSDResnetV1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights,
use_explicit_padding)
if self._depth_multiplier != 1.0:
raise ValueError('Only depth 1.0 is supported, found: {}'.
format(self._depth_multiplier))
if self._use_explicit_padding is True:
raise ValueError('Explicit padding is not a valid option.')
self._resnet_base_fn = resnet_base_fn
self._resnet_scope_name = resnet_scope_name
self._fpn_scope_name = fpn_scope_name
def preprocess(self, resized_inputs):
"""SSD preprocessing.
VGG style channel mean subtraction as described here:
https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-mdnge.
Args:
resized_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
channel_means = [123.68, 116.779, 103.939]
return resized_inputs - [[channel_means]]
def _filter_features(self, image_features):
# TODO: Change resnet endpoint to strip scope prefixes instead
# of munging the scope here.
filtered_image_features = dict({})
for key, feature in image_features.items():
feature_name = key.split('/')[-1]
if feature_name in ['block2', 'block3', 'block4']:
filtered_image_features[feature_name] = feature
return filtered_image_features
def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
Raises:
ValueError: depth multiplier is not supported.
"""
if self._depth_multiplier != 1.0:
raise ValueError('Depth multiplier not supported.')
preprocessed_inputs = shape_utils.check_min_image_dim(
129, preprocessed_inputs)
with tf.variable_scope(
self._resnet_scope_name, reuse=self._reuse_weights) as scope:
with slim.arg_scope(resnet_v1.resnet_arg_scope()):
_, image_features = self._resnet_base_fn(
inputs=ops.pad_to_multiple(preprocessed_inputs,
self._pad_to_multiple),
num_classes=None,
is_training=self._is_training and self._batch_norm_trainable,
global_pool=False,
output_stride=None,
store_non_strided_activations=True,
scope=scope)
image_features = self._filter_features(image_features)
last_feature_map = image_features['block4']
with tf.variable_scope(self._fpn_scope_name, reuse=self._reuse_weights):
with slim.arg_scope(self._conv_hyperparams):
for i in range(5, 7):
last_feature_map = slim.conv2d(
last_feature_map,
num_outputs=256,
kernel_size=[3, 3],
stride=2,
padding='SAME',
scope='block{}'.format(i))
image_features['bottomup_{}'.format(i)] = last_feature_map
feature_maps = feature_map_generators.fpn_top_down_feature_maps(
[
image_features[key] for key in
['block2', 'block3', 'block4', 'bottomup_5', 'bottomup_6']
],
depth=256,
scope='top_down_features')
return feature_maps.values()
class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
batch_norm_trainable=True,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
"""Resnet50 v1 FPN Feature Extractor for SSD Models.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
"""
super(SSDResnet50V1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, resnet_v1.resnet_v1_50, 'resnet_v1_50', 'fpn',
batch_norm_trainable, reuse_weights, use_explicit_padding)
class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
batch_norm_trainable=True,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
"""Resnet101 v1 FPN Feature Extractor for SSD Models.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
"""
super(SSDResnet101V1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, resnet_v1.resnet_v1_101, 'resnet_v1_101', 'fpn',
batch_norm_trainable, reuse_weights, use_explicit_padding)
class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
batch_norm_trainable=True,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
"""Resnet152 v1 FPN Feature Extractor for SSD Models.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
"""
super(SSDResnet152V1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, resnet_v1.resnet_v1_152, 'resnet_v1_152', 'fpn',
batch_norm_trainable, reuse_weights, use_explicit_padding)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment