Unverified Commit fd7b6887 authored by Jonathan Huang's avatar Jonathan Huang Committed by GitHub
Browse files

Merge pull request #3293 from pkulzc/master

Internal changes of object_detection 
parents f98ec55e 1efe98bb
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Creates and runs `Experiment` for object detection model.
This uses the TF.learn framework to define and run an object detection model
wrapped in an `Estimator`.
Note that this module is only compatible with SSD Meta architecture at the
moment.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import os
import tensorflow as tf
from google.protobuf import text_format
from tensorflow.contrib.learn.python.learn import learn_runner
from tensorflow.contrib.tpu.python.tpu import tpu_optimizer
from object_detection import eval_util
from object_detection import inputs
from object_detection import model_hparams
from object_detection.builders import model_builder
from object_detection.builders import optimizer_builder
from object_detection.core import standard_fields as fields
from object_detection.metrics import coco_evaluation
from object_detection.utils import config_util
from object_detection.utils import label_map_util
from object_detection.utils import shape_utils
from object_detection.utils import variables_helper
from object_detection.utils import visualization_utils as vis_utils
tf.flags.DEFINE_string('model_dir', None, 'Path to output model directory '
'where event and checkpoint files will be written.')
tf.flags.DEFINE_string('pipeline_config_path', None, 'Path to pipeline config '
'file.')
tf.flags.DEFINE_integer('num_train_steps', 500000, 'Number of train steps.')
tf.flags.DEFINE_integer('num_eval_steps', 10000, 'Number of train steps.')
FLAGS = tf.flags.FLAGS
def _get_groundtruth_data(detection_model, class_agnostic):
"""Extracts groundtruth data from detection_model.
Args:
detection_model: A `DetectionModel` object.
class_agnostic: Whether the detections are class_agnostic.
Returns:
A tuple of:
groundtruth: Dictionary with the following fields:
'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
normalized coordinates.
'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
'groundtruth_masks': 3D float32 tensor of instance masks (if provided in
groundtruth)
class_agnostic: Boolean indicating whether detections are class agnostic.
"""
input_data_fields = fields.InputDataFields()
groundtruth_boxes = detection_model.groundtruth_lists(
fields.BoxListFields.boxes)[0]
# For class-agnostic models, groundtruth one-hot encodings collapse to all
# ones.
if class_agnostic:
groundtruth_boxes_shape = tf.shape(groundtruth_boxes)
groundtruth_classes_one_hot = tf.ones([groundtruth_boxes_shape[0], 1])
else:
groundtruth_classes_one_hot = detection_model.groundtruth_lists(
fields.BoxListFields.classes)[0]
label_id_offset = 1 # Applying label id offset (b/63711816)
groundtruth_classes = (
tf.argmax(groundtruth_classes_one_hot, axis=1) + label_id_offset)
groundtruth = {
input_data_fields.groundtruth_boxes: groundtruth_boxes,
input_data_fields.groundtruth_classes: groundtruth_classes
}
if detection_model.groundtruth_has_field(fields.BoxListFields.masks):
groundtruth[input_data_fields.groundtruth_instance_masks] = (
detection_model.groundtruth_lists(fields.BoxListFields.masks)[0])
return groundtruth
def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True):
"""Unstacks all tensors in `tensor_dict` along 0th dimension.
Unstacks tensor from the tensor dict along 0th dimension and returns a
tensor_dict containing values that are lists of unstacked tensors.
Tensors in the `tensor_dict` are expected to be of one of the three shapes:
1. [batch_size]
2. [batch_size, height, width, channels]
3. [batch_size, num_boxes, d1, d2, ... dn]
When unpad_tensors is set to true, unstacked tensors of form 3 above are
sliced along the `num_boxes` dimension using the value in tensor
field.InputDataFields.num_groundtruth_boxes.
Note that this function has a static list of input data fields and has to be
kept in sync with the InputDataFields defined in core/standard_fields.py
Args:
tensor_dict: A dictionary of batched groundtruth tensors.
unpad_groundtruth_tensors: Whether to remove padding along `num_boxes`
dimension of the groundtruth tensors.
Returns:
A dictionary where the keys are from fields.InputDataFields and values are
a list of unstacked (optionally unpadded) tensors.
Raises:
ValueError: If unpad_tensors is True and `tensor_dict` does not contain
`num_groundtruth_boxes` tensor.
"""
unbatched_tensor_dict = {key: tf.unstack(tensor)
for key, tensor in tensor_dict.items()}
if unpad_groundtruth_tensors:
if (fields.InputDataFields.num_groundtruth_boxes not in
unbatched_tensor_dict):
raise ValueError('`num_groundtruth_boxes` not found in tensor_dict. '
'Keys available: {}'.format(
unbatched_tensor_dict.keys()))
unbatched_unpadded_tensor_dict = {}
unpad_keys = set([
# List of input data fields that are padded along the num_boxes
# dimension. This list has to be kept in sync with InputDataFields in
# standard_fields.py.
fields.InputDataFields.groundtruth_instance_masks,
fields.InputDataFields.groundtruth_classes,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_keypoints,
fields.InputDataFields.groundtruth_group_of,
fields.InputDataFields.groundtruth_difficult,
fields.InputDataFields.groundtruth_is_crowd,
fields.InputDataFields.groundtruth_area,
fields.InputDataFields.groundtruth_weights
]).intersection(set(unbatched_tensor_dict.keys()))
for key in unpad_keys:
unpadded_tensor_list = []
for num_gt, padded_tensor in zip(
unbatched_tensor_dict[fields.InputDataFields.num_groundtruth_boxes],
unbatched_tensor_dict[key]):
tensor_shape = shape_utils.combined_static_and_dynamic_shape(
padded_tensor)
slice_begin = tf.zeros([len(tensor_shape)], dtype=tf.int32)
slice_size = tf.stack(
[num_gt] + [-1 if dim is None else dim for dim in tensor_shape[1:]])
unpadded_tensor = tf.slice(padded_tensor, slice_begin, slice_size)
unpadded_tensor_list.append(unpadded_tensor)
unbatched_unpadded_tensor_dict[key] = unpadded_tensor_list
unbatched_tensor_dict.update(unbatched_unpadded_tensor_dict)
return unbatched_tensor_dict
def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
"""Creates a model function for `Estimator`.
Args:
detection_model_fn: Function that returns a `DetectionModel` instance.
configs: Dictionary of pipeline config objects.
hparams: `HParams` object.
use_tpu: Boolean indicating whether model should be constructed for
use on TPU.
Returns:
`model_fn` for `Estimator`.
"""
train_config = configs['train_config']
eval_input_config = configs['eval_input_config']
def model_fn(features, labels, mode, params=None):
"""Constructs the object detection model.
Args:
features: Dictionary of feature tensors, returned from `input_fn`.
labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL,
otherwise None.
mode: Mode key from tf.estimator.ModeKeys.
params: Parameter dictionary passed from the estimator.
Returns:
An `EstimatorSpec` that encapsulates the model and its serving
configurations.
"""
params = params or {}
total_loss, train_op, detections, export_outputs = None, None, None, None
is_training = mode == tf.estimator.ModeKeys.TRAIN
detection_model = detection_model_fn(is_training=is_training,
add_summaries=(not use_tpu))
scaffold_fn = None
if mode == tf.estimator.ModeKeys.TRAIN:
labels = unstack_batch(
labels,
unpad_groundtruth_tensors=train_config.unpad_groundtruth_tensors)
elif mode == tf.estimator.ModeKeys.EVAL:
labels = unstack_batch(labels, unpad_groundtruth_tensors=False)
if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes]
gt_classes_list = labels[fields.InputDataFields.groundtruth_classes]
gt_masks_list = None
if fields.InputDataFields.groundtruth_instance_masks in labels:
gt_masks_list = labels[
fields.InputDataFields.groundtruth_instance_masks]
gt_keypoints_list = None
if fields.InputDataFields.groundtruth_keypoints in labels:
gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints]
detection_model.provide_groundtruth(
groundtruth_boxes_list=gt_boxes_list,
groundtruth_classes_list=gt_classes_list,
groundtruth_masks_list=gt_masks_list,
groundtruth_keypoints_list=gt_keypoints_list)
preprocessed_images = features[fields.InputDataFields.image]
prediction_dict = detection_model.predict(
preprocessed_images, features[fields.InputDataFields.true_image_shape])
detections = detection_model.postprocess(
prediction_dict, features[fields.InputDataFields.true_image_shape])
if mode == tf.estimator.ModeKeys.TRAIN:
if train_config.fine_tune_checkpoint and hparams.load_pretrained:
asg_map = detection_model.restore_map(
from_detection_checkpoint=train_config.from_detection_checkpoint,
load_all_detection_checkpoint_vars=(
train_config.load_all_detection_checkpoint_vars))
available_var_map = (
variables_helper.get_variables_available_in_checkpoint(
asg_map, train_config.fine_tune_checkpoint,
include_global_step=False))
if use_tpu:
def tpu_scaffold():
tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint,
available_var_map)
return tf.train.Scaffold()
scaffold_fn = tpu_scaffold
else:
tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint,
available_var_map)
if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
losses_dict = detection_model.loss(
prediction_dict, features[fields.InputDataFields.true_image_shape])
losses = [loss_tensor for loss_tensor in losses_dict.itervalues()]
total_loss = tf.add_n(losses, name='total_loss')
if mode == tf.estimator.ModeKeys.TRAIN:
global_step = tf.train.get_or_create_global_step()
training_optimizer, optimizer_summary_vars = optimizer_builder.build(
train_config.optimizer)
if use_tpu:
training_optimizer = tpu_optimizer.CrossShardOptimizer(
training_optimizer)
# Optionally freeze some layers by setting their gradients to be zero.
trainable_variables = None
if train_config.freeze_variables:
trainable_variables = tf.contrib.framework.filter_variables(
tf.trainable_variables(),
exclude_patterns=train_config.freeze_variables)
clip_gradients_value = None
if train_config.gradient_clipping_by_norm > 0:
clip_gradients_value = train_config.gradient_clipping_by_norm
if not use_tpu:
for var in optimizer_summary_vars:
tf.summary.scalar(var.op.name, var)
summaries = [] if use_tpu else None
train_op = tf.contrib.layers.optimize_loss(
loss=total_loss,
global_step=global_step,
learning_rate=None,
clip_gradients=clip_gradients_value,
optimizer=training_optimizer,
variables=trainable_variables,
summaries=summaries,
name='') # Preventing scope prefix on all variables.
if mode == tf.estimator.ModeKeys.PREDICT:
export_outputs = {
tf.saved_model.signature_constants.PREDICT_METHOD_NAME:
tf.estimator.export.PredictOutput(detections)
}
eval_metric_ops = None
if mode == tf.estimator.ModeKeys.EVAL:
# Detection summaries during eval.
class_agnostic = (fields.DetectionResultFields.detection_classes
not in detections)
groundtruth = _get_groundtruth_data(detection_model, class_agnostic)
eval_dict = eval_util.result_dict_for_single_example(
tf.expand_dims(features[fields.InputDataFields.original_image][0], 0),
features[inputs.HASH_KEY][0],
detections,
groundtruth,
class_agnostic=class_agnostic,
scale_to_absolute=False)
if class_agnostic:
category_index = label_map_util.create_class_agnostic_category_index()
else:
category_index = label_map_util.create_category_index_from_labelmap(
eval_input_config.label_map_path)
detection_and_groundtruth = vis_utils.draw_side_by_side_evaluation_image(
eval_dict, category_index, max_boxes_to_draw=20, min_score_thresh=0.2)
if not use_tpu:
tf.summary.image('Detections_Left_Groundtruth_Right',
detection_and_groundtruth)
# Eval metrics on a single image.
detection_fields = fields.DetectionResultFields()
input_data_fields = fields.InputDataFields()
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
category_index.values())
eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops(
image_id=eval_dict[input_data_fields.key],
groundtruth_boxes=eval_dict[input_data_fields.groundtruth_boxes],
groundtruth_classes=eval_dict[input_data_fields.groundtruth_classes],
detection_boxes=eval_dict[detection_fields.detection_boxes],
detection_scores=eval_dict[detection_fields.detection_scores],
detection_classes=eval_dict[detection_fields.detection_classes])
if use_tpu:
return tf.contrib.tpu.TPUEstimatorSpec(
mode=mode,
scaffold_fn=scaffold_fn,
predictions=detections,
loss=total_loss,
train_op=train_op,
eval_metrics=eval_metric_ops,
export_outputs=export_outputs)
else:
return tf.estimator.EstimatorSpec(
mode=mode,
predictions=detections,
loss=total_loss,
train_op=train_op,
eval_metric_ops=eval_metric_ops,
export_outputs=export_outputs)
return model_fn
def _build_experiment_fn(train_steps, eval_steps):
"""Returns a function that creates an `Experiment`."""
def build_experiment(run_config, hparams):
"""Builds an `Experiment` from configuration and hyperparameters.
Args:
run_config: A `RunConfig`.
hparams: A `HParams`.
Returns:
An `Experiment` object.
"""
return populate_experiment(run_config, hparams, FLAGS.pipeline_config_path,
train_steps, eval_steps)
return build_experiment
def populate_experiment(run_config,
hparams,
pipeline_config_path,
train_steps=None,
eval_steps=None,
model_fn_creator=create_model_fn,
**kwargs):
"""Populates an `Experiment` object.
Args:
run_config: A `RunConfig`.
hparams: A `HParams`.
pipeline_config_path: A path to a pipeline config file.
train_steps: Number of training steps. If None, the number of training steps
is set from the `TrainConfig` proto.
eval_steps: Number of evaluation steps per evaluation cycle. If None, the
number of evaluation steps is set from the `EvalConfig` proto.
model_fn_creator: A function that creates a `model_fn` for `Estimator`.
Follows the signature:
* Args:
* `detection_model_fn`: Function that returns `DetectionModel` instance.
* `configs`: Dictionary of pipeline config objects.
* `hparams`: `HParams` object.
* Returns:
`model_fn` for `Estimator`.
**kwargs: Additional keyword arguments for configuration override.
Returns:
An `Experiment` that defines all aspects of training, evaluation, and
export.
"""
configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
configs = config_util.merge_external_params_with_configs(
configs,
hparams,
train_steps=train_steps,
eval_steps=eval_steps,
**kwargs)
model_config = configs['model']
train_config = configs['train_config']
train_input_config = configs['train_input_config']
eval_config = configs['eval_config']
eval_input_config = configs['eval_input_config']
if train_steps is None:
train_steps = train_config.num_steps if train_config.num_steps else None
if eval_steps is None:
eval_steps = eval_config.num_examples if eval_config.num_examples else None
detection_model_fn = functools.partial(
model_builder.build, model_config=model_config)
# Create the input functions for TRAIN/EVAL.
train_input_fn = inputs.create_train_input_fn(
train_config=train_config,
train_input_config=train_input_config,
model_config=model_config)
eval_input_fn = inputs.create_eval_input_fn(
eval_config=eval_config,
eval_input_config=eval_input_config,
model_config=model_config)
export_strategies = [
tf.contrib.learn.utils.saved_model_export_utils.make_export_strategy(
serving_input_fn=inputs.create_predict_input_fn(
model_config=model_config))
]
estimator = tf.estimator.Estimator(
model_fn=model_fn_creator(detection_model_fn, configs, hparams),
config=run_config)
if run_config.is_chief:
# Store the final pipeline config for traceability.
pipeline_config_final = config_util.create_pipeline_proto_from_configs(
configs)
pipeline_config_final_path = os.path.join(estimator.model_dir,
'pipeline.config')
config_text = text_format.MessageToString(pipeline_config_final)
with tf.gfile.Open(pipeline_config_final_path, 'wb') as f:
tf.logging.info('Writing as-run pipeline config file to %s',
pipeline_config_final_path)
f.write(config_text)
return tf.contrib.learn.Experiment(
estimator=estimator,
train_input_fn=train_input_fn,
eval_input_fn=eval_input_fn,
train_steps=train_steps,
eval_steps=eval_steps,
export_strategies=export_strategies,
eval_delay_secs=120,)
def main(unused_argv):
tf.flags.mark_flag_as_required('model_dir')
tf.flags.mark_flag_as_required('pipeline_config_path')
config = tf.contrib.learn.RunConfig(model_dir=FLAGS.model_dir)
learn_runner.run(
experiment_fn=_build_experiment_fn(FLAGS.num_train_steps,
FLAGS.num_eval_steps),
run_config=config,
hparams=model_hparams.create_hparams())
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Hyperparameters for the object detection model in TF.learn.
This file consolidates and documents the hyperparameters used by the model.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
def create_hparams(hparams_overrides=None):
"""Returns hyperparameters, including any flag value overrides.
Args:
hparams_overrides: Optional hparams overrides, represented as a
string containing comma-separated hparam_name=value pairs.
Returns:
The hyperparameters as a tf.HParams object.
"""
hparams = tf.contrib.training.HParams(
# Whether a fine tuning checkpoint (provided in the pipeline config)
# should be loaded for training.
load_pretrained=True)
# Override any of the preceding hyperparameter values.
if hparams_overrides:
hparams = hparams.parse(hparams_overrides)
return hparams
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object detection model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import os
import numpy as np
import tensorflow as tf
from object_detection import inputs
from object_detection import model
from object_detection import model_hparams
from object_detection import model_test_util
from object_detection.builders import model_builder
from object_detection.core import standard_fields as fields
from object_detection.utils import config_util
FLAGS = tf.flags.FLAGS
MODEL_NAME_FOR_TEST = model_test_util.SSD_INCEPTION_MODEL_NAME
def _get_data_path():
"""Returns an absolute path to TFRecord file."""
return os.path.join(FLAGS.test_srcdir, model_test_util.PATH_BASE, 'test_data',
'pets_examples.record')
def _get_labelmap_path():
"""Returns an absolute path to label map file."""
return os.path.join(FLAGS.test_srcdir, model_test_util.PATH_BASE, 'data',
'pet_label_map.pbtxt')
def _get_configs_for_model(model_name):
"""Returns configurations for model."""
filename = model_test_util.GetPipelineConfigPath(model_name)
data_path = _get_data_path()
label_map_path = _get_labelmap_path()
configs = config_util.get_configs_from_pipeline_file(filename)
configs = config_util.merge_external_params_with_configs(
configs,
train_input_path=data_path,
eval_input_path=data_path,
label_map_path=label_map_path)
return configs
def setUpModule():
model_test_util.InitializeFlags(MODEL_NAME_FOR_TEST)
class ModelTflearnTest(tf.test.TestCase):
@classmethod
def setUpClass(cls):
tf.reset_default_graph()
def _assert_outputs_for_train_eval(self, configs, mode, class_agnostic=False):
model_config = configs['model']
train_config = configs['train_config']
with tf.Graph().as_default():
if mode == tf.estimator.ModeKeys.TRAIN:
features, labels = inputs.create_train_input_fn(
configs['train_config'],
configs['train_input_config'],
configs['model'])()
batch_size = train_config.batch_size
else:
features, labels = inputs.create_eval_input_fn(
configs['eval_config'],
configs['eval_input_config'],
configs['model'])()
batch_size = 1
detection_model_fn = functools.partial(
model_builder.build, model_config=model_config, is_training=True)
hparams = model_hparams.create_hparams(
hparams_overrides='load_pretrained=false')
model_fn = model.create_model_fn(detection_model_fn, configs, hparams)
estimator_spec = model_fn(features, labels, mode)
self.assertIsNotNone(estimator_spec.loss)
self.assertIsNotNone(estimator_spec.predictions)
if class_agnostic:
self.assertNotIn('detection_classes', estimator_spec.predictions)
else:
detection_classes = estimator_spec.predictions['detection_classes']
self.assertEqual(batch_size, detection_classes.shape.as_list()[0])
self.assertEqual(tf.float32, detection_classes.dtype)
detection_boxes = estimator_spec.predictions['detection_boxes']
detection_scores = estimator_spec.predictions['detection_scores']
num_detections = estimator_spec.predictions['num_detections']
self.assertEqual(batch_size, detection_boxes.shape.as_list()[0])
self.assertEqual(tf.float32, detection_boxes.dtype)
self.assertEqual(batch_size, detection_scores.shape.as_list()[0])
self.assertEqual(tf.float32, detection_scores.dtype)
self.assertEqual(tf.float32, num_detections.dtype)
if mode == tf.estimator.ModeKeys.TRAIN:
self.assertIsNotNone(estimator_spec.train_op)
return estimator_spec
def _assert_outputs_for_predict(self, configs):
model_config = configs['model']
with tf.Graph().as_default():
features, _ = inputs.create_eval_input_fn(
configs['eval_config'],
configs['eval_input_config'],
configs['model'])()
detection_model_fn = functools.partial(
model_builder.build, model_config=model_config, is_training=False)
hparams = model_hparams.create_hparams(
hparams_overrides='load_pretrained=false')
model_fn = model.create_model_fn(detection_model_fn, configs, hparams)
estimator_spec = model_fn(features, None, tf.estimator.ModeKeys.PREDICT)
self.assertIsNone(estimator_spec.loss)
self.assertIsNone(estimator_spec.train_op)
self.assertIsNotNone(estimator_spec.predictions)
self.assertIsNotNone(estimator_spec.export_outputs)
self.assertIn(tf.saved_model.signature_constants.PREDICT_METHOD_NAME,
estimator_spec.export_outputs)
def testModelFnInTrainMode(self):
"""Tests the model function in TRAIN mode."""
configs = _get_configs_for_model(MODEL_NAME_FOR_TEST)
self._assert_outputs_for_train_eval(configs, tf.estimator.ModeKeys.TRAIN)
def testModelFnInEvalMode(self):
"""Tests the model function in EVAL mode."""
configs = _get_configs_for_model(MODEL_NAME_FOR_TEST)
self._assert_outputs_for_train_eval(configs, tf.estimator.ModeKeys.EVAL)
def testModelFnInPredictMode(self):
"""Tests the model function in PREDICT mode."""
configs = _get_configs_for_model(MODEL_NAME_FOR_TEST)
self._assert_outputs_for_predict(configs)
def testExperiment(self):
"""Tests that the `Experiment` object is constructed correctly."""
experiment = model_test_util.BuildExperiment()
model_dir = experiment.estimator.model_dir
pipeline_config_path = os.path.join(model_dir, 'pipeline.config')
self.assertTrue(tf.gfile.Exists(pipeline_config_path))
class UnbatchTensorsTest(tf.test.TestCase):
def test_unbatch_without_unpadding(self):
image_placeholder = tf.placeholder(tf.float32, [2, None, None, None])
groundtruth_boxes_placeholder = tf.placeholder(tf.float32, [2, None, None])
groundtruth_classes_placeholder = tf.placeholder(tf.float32,
[2, None, None])
groundtruth_weights_placeholder = tf.placeholder(tf.float32, [2, None])
tensor_dict = {
fields.InputDataFields.image:
image_placeholder,
fields.InputDataFields.groundtruth_boxes:
groundtruth_boxes_placeholder,
fields.InputDataFields.groundtruth_classes:
groundtruth_classes_placeholder,
fields.InputDataFields.groundtruth_weights:
groundtruth_weights_placeholder
}
unbatched_tensor_dict = model.unstack_batch(
tensor_dict, unpad_groundtruth_tensors=False)
with self.test_session() as sess:
unbatched_tensor_dict_out = sess.run(
unbatched_tensor_dict,
feed_dict={
image_placeholder:
np.random.rand(2, 4, 4, 3).astype(np.float32),
groundtruth_boxes_placeholder:
np.random.rand(2, 5, 4).astype(np.float32),
groundtruth_classes_placeholder:
np.random.rand(2, 5, 6).astype(np.float32),
groundtruth_weights_placeholder:
np.random.rand(2, 5).astype(np.float32)
})
for image_out in unbatched_tensor_dict_out[fields.InputDataFields.image]:
self.assertAllEqual(image_out.shape, [4, 4, 3])
for groundtruth_boxes_out in unbatched_tensor_dict_out[
fields.InputDataFields.groundtruth_boxes]:
self.assertAllEqual(groundtruth_boxes_out.shape, [5, 4])
for groundtruth_classes_out in unbatched_tensor_dict_out[
fields.InputDataFields.groundtruth_classes]:
self.assertAllEqual(groundtruth_classes_out.shape, [5, 6])
for groundtruth_weights_out in unbatched_tensor_dict_out[
fields.InputDataFields.groundtruth_weights]:
self.assertAllEqual(groundtruth_weights_out.shape, [5])
def test_unbatch_and_unpad_groundtruth_tensors(self):
image_placeholder = tf.placeholder(tf.float32, [2, None, None, None])
groundtruth_boxes_placeholder = tf.placeholder(tf.float32, [2, 5, None])
groundtruth_classes_placeholder = tf.placeholder(tf.float32, [2, 5, None])
groundtruth_weights_placeholder = tf.placeholder(tf.float32, [2, 5])
num_groundtruth_placeholder = tf.placeholder(tf.int32, [2])
tensor_dict = {
fields.InputDataFields.image:
image_placeholder,
fields.InputDataFields.groundtruth_boxes:
groundtruth_boxes_placeholder,
fields.InputDataFields.groundtruth_classes:
groundtruth_classes_placeholder,
fields.InputDataFields.groundtruth_weights:
groundtruth_weights_placeholder,
fields.InputDataFields.num_groundtruth_boxes:
num_groundtruth_placeholder
}
unbatched_tensor_dict = model.unstack_batch(
tensor_dict, unpad_groundtruth_tensors=True)
with self.test_session() as sess:
unbatched_tensor_dict_out = sess.run(
unbatched_tensor_dict,
feed_dict={
image_placeholder:
np.random.rand(2, 4, 4, 3).astype(np.float32),
groundtruth_boxes_placeholder:
np.random.rand(2, 5, 4).astype(np.float32),
groundtruth_classes_placeholder:
np.random.rand(2, 5, 6).astype(np.float32),
groundtruth_weights_placeholder:
np.random.rand(2, 5).astype(np.float32),
num_groundtruth_placeholder:
np.array([3, 3], np.int32)
})
for image_out in unbatched_tensor_dict_out[fields.InputDataFields.image]:
self.assertAllEqual(image_out.shape, [4, 4, 3])
for groundtruth_boxes_out in unbatched_tensor_dict_out[
fields.InputDataFields.groundtruth_boxes]:
self.assertAllEqual(groundtruth_boxes_out.shape, [3, 4])
for groundtruth_classes_out in unbatched_tensor_dict_out[
fields.InputDataFields.groundtruth_classes]:
self.assertAllEqual(groundtruth_classes_out.shape, [3, 6])
for groundtruth_weights_out in unbatched_tensor_dict_out[
fields.InputDataFields.groundtruth_weights]:
self.assertAllEqual(groundtruth_weights_out.shape, [3])
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Common utils for tests for object detection tflearn model."""
from __future__ import absolute_import
import os
import tempfile
import tensorflow as tf
from object_detection import model
from object_detection import model_hparams
FLAGS = tf.flags.FLAGS
FASTER_RCNN_MODEL_NAME = 'faster_rcnn_resnet50_pets'
SSD_INCEPTION_MODEL_NAME = 'ssd_inception_v2_pets'
PATH_BASE = 'google3/third_party/tensorflow_models/object_detection/'
def GetPipelineConfigPath(model_name):
"""Returns path to the local pipeline config file."""
return os.path.join(FLAGS.test_srcdir, PATH_BASE, 'samples', 'configs',
model_name + '.config')
def InitializeFlags(model_name_for_test):
FLAGS.model_dir = tempfile.mkdtemp()
FLAGS.pipeline_config_path = GetPipelineConfigPath(model_name_for_test)
def BuildExperiment():
"""Builds an Experiment object for testing purposes."""
run_config = tf.contrib.learn.RunConfig()
hparams = model_hparams.create_hparams(
hparams_overrides='load_pretrained=false')
# pylint: disable=protected-access
experiment_fn = model._build_experiment_fn(10, 10)
# pylint: enable=protected-access
return experiment_fn(run_config, hparams)
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Creates and runs `Estimator` for object detection model on TPUs.
This uses the TPUEstimator API to define and run a model in TRAIN/EVAL modes.
"""
# pylint: enable=line-too-long
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import os
import tensorflow as tf
from tensorflow.contrib.tpu.python.tpu import tpu_config
from tensorflow.contrib.tpu.python.tpu import tpu_estimator
from tensorflow.contrib.training.python.training import evaluation
from object_detection import inputs
from object_detection import model
from object_detection import model_hparams
from object_detection.builders import model_builder
from object_detection.utils import config_util
tf.flags.DEFINE_bool('use_tpu', True, 'Use TPUs rather than plain CPUs')
# Cloud TPU Cluster Resolvers
tf.flags.DEFINE_string(
'gcp_project',
default=None,
help='Project name for the Cloud TPU-enabled project. If not specified, we '
'will attempt to automatically detect the GCE project from metadata.')
tf.flags.DEFINE_string(
'tpu_zone',
default=None,
help='GCE zone where the Cloud TPU is located in. If not specified, we '
'will attempt to automatically detect the GCE project from metadata.')
tf.flags.DEFINE_string(
'tpu_name',
default=None,
help='Name of the Cloud TPU for Cluster Resolvers. You must specify either '
'this flag or --master.')
tf.flags.DEFINE_string(
'master', default=None,
help='GRPC URL of the master (e.g. grpc://ip.address.of.tpu:8470). You '
'must specify either this flag or --tpu_name.')
tf.flags.DEFINE_integer('num_shards', 8, 'Number of shards (TPU cores).')
tf.flags.DEFINE_integer('iterations_per_loop', 100,
'Number of iterations per TPU training loop.')
# For mode=train_and_eval, evaluation occurs after training is finished.
# Note: independently of steps_per_checkpoint, estimator will save the most
# recent checkpoint every 10 minutes by default for train_and_eval
tf.flags.DEFINE_string('mode', 'train_and_eval',
'Mode to run: train, eval, train_and_eval')
tf.flags.DEFINE_integer('train_batch_size', 32 * 8, 'Batch size for training.')
# For EVAL.
tf.flags.DEFINE_integer('min_eval_interval_secs', 180,
'Minimum seconds between evaluations.')
tf.flags.DEFINE_integer(
'eval_timeout_secs', None,
'Maximum seconds between checkpoints before evaluation terminates.')
FLAGS = tf.flags.FLAGS
def create_estimator(run_config,
hparams,
pipeline_config_path,
train_steps=None,
eval_steps=None,
train_batch_size=None,
model_fn_creator=model.create_model_fn,
use_tpu=False,
num_shards=1,
params=None,
**kwargs):
"""Creates an `Estimator` object.
Args:
run_config: A `RunConfig`.
hparams: A `HParams`.
pipeline_config_path: A path to a pipeline config file.
train_steps: Number of training steps. If None, the number of training steps
is set from the `TrainConfig` proto.
eval_steps: Number of evaluation steps per evaluation cycle. If None, the
number of evaluation steps is set from the `EvalConfig` proto.
train_batch_size: Training batch size. If none, use batch size from
`TrainConfig` proto.
model_fn_creator: A function that creates a `model_fn` for `Estimator`.
Follows the signature:
* Args:
* `detection_model_fn`: Function that returns `DetectionModel` instance.
* `configs`: Dictionary of pipeline config objects.
* `hparams`: `HParams` object.
* Returns:
`model_fn` for `Estimator`.
use_tpu: Boolean, whether training and evaluation should run on TPU.
num_shards: Number of shards (TPU cores).
params: Parameter dictionary passed from the estimator.
**kwargs: Additional keyword arguments for configuration override.
Returns:
Estimator: A estimator object used for training and evaluation
train_input_fn: Input function for the training loop
eval_input_fn: Input function for the evaluation run
train_steps: Number of training steps either from arg `train_steps` or
`TrainConfig` proto
eval_steps: Number of evaluation steps either from arg `eval_steps` or
`EvalConfig` proto
"""
configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
configs = config_util.merge_external_params_with_configs(
configs,
hparams,
train_steps=train_steps,
eval_steps=eval_steps,
batch_size=train_batch_size,
**kwargs)
model_config = configs['model']
train_config = configs['train_config']
train_input_config = configs['train_input_config']
eval_config = configs['eval_config']
eval_input_config = configs['eval_input_config']
if params is None:
params = {}
if train_steps is None:
train_steps = train_config.num_steps if train_config.num_steps else None
if eval_steps is None:
eval_steps = eval_config.num_examples if eval_config.num_examples else None
detection_model_fn = functools.partial(
model_builder.build, model_config=model_config)
# Create the input functions for TRAIN/EVAL.
train_input_fn = inputs.create_train_input_fn(
train_config=train_config,
train_input_config=train_input_config,
model_config=model_config)
eval_input_fn = inputs.create_eval_input_fn(
eval_config=eval_config,
eval_input_config=eval_input_config,
model_config=model_config)
estimator = tpu_estimator.TPUEstimator(
model_fn=model_fn_creator(detection_model_fn, configs, hparams,
use_tpu),
train_batch_size=train_config.batch_size,
# For each core, only batch size 1 is supported for eval.
eval_batch_size=num_shards * 1 if use_tpu else 1,
use_tpu=use_tpu,
config=run_config,
params=params)
return estimator, train_input_fn, eval_input_fn, train_steps, eval_steps
def main(unused_argv):
tf.flags.mark_flag_as_required('model_dir')
tf.flags.mark_flag_as_required('pipeline_config_path')
if FLAGS.master is None and FLAGS.tpu_name is None:
raise RuntimeError('You must specify either --master or --tpu_name.')
if FLAGS.master is not None:
if FLAGS.tpu_name is not None:
tf.logging.warn('Both --master and --tpu_name are set. Ignoring '
'--tpu_name and using --master.')
tpu_grpc_url = FLAGS.master
else:
tpu_cluster_resolver = (
tf.contrib.cluster_resolver.python.training.TPUClusterResolver(
tpu_names=[FLAGS.tpu_name],
zone=FLAGS.tpu_zone,
project=FLAGS.gcp_project))
tpu_grpc_url = tpu_cluster_resolver.get_master()
config = tpu_config.RunConfig(
master=tpu_grpc_url,
evaluation_master=tpu_grpc_url,
model_dir=FLAGS.model_dir,
tpu_config=tpu_config.TPUConfig(
iterations_per_loop=FLAGS.iterations_per_loop,
num_shards=FLAGS.num_shards))
params = {}
estimator, train_input_fn, eval_input_fn, train_steps, eval_steps = (
create_estimator(
config,
model_hparams.create_hparams(),
FLAGS.pipeline_config_path,
train_steps=FLAGS.num_train_steps,
eval_steps=FLAGS.num_eval_steps,
train_batch_size=FLAGS.train_batch_size,
use_tpu=FLAGS.use_tpu,
num_shards=FLAGS.num_shards,
params=params))
if FLAGS.mode in ['train', 'train_and_eval']:
estimator.train(input_fn=train_input_fn, max_steps=train_steps)
if FLAGS.mode == 'train_and_eval':
# Eval one time.
eval_results = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
tf.logging.info('Eval results: %s' % eval_results)
# Continuously evaluating.
if FLAGS.mode == 'eval':
def terminate_eval():
tf.logging.info('Terminating eval after %d seconds of no checkpoints' %
FLAGS.eval_timeout_secs)
return True
# Run evaluation when there's a new checkpoint.
for ckpt in evaluation.checkpoints_iterator(
FLAGS.model_dir,
min_interval_secs=FLAGS.min_eval_interval_secs,
timeout=FLAGS.eval_timeout_secs,
timeout_fn=terminate_eval):
tf.logging.info('Starting to evaluate.')
try:
eval_results = estimator.evaluate(
input_fn=eval_input_fn,
steps=eval_steps,
checkpoint_path=ckpt)
tf.logging.info('Eval results: %s' % eval_results)
# Terminate eval job when final checkpoint is reached
current_step = int(os.path.basename(ckpt).split('-')[1])
if current_step >= train_steps:
tf.logging.info(
'Evaluation finished after training step %d' % current_step)
break
except tf.errors.NotFoundError:
tf.logging.info(
'Checkpoint %s no longer exists, skipping checkpoint' % ckpt)
if __name__ == '__main__':
tf.app.run()
...@@ -15,6 +15,7 @@ py_library( ...@@ -15,6 +15,7 @@ py_library(
], ],
deps = [ deps = [
"//tensorflow", "//tensorflow",
"//tensorflow/models/research/object_detection/utils:ops",
], ],
) )
...@@ -36,6 +37,7 @@ py_library( ...@@ -36,6 +37,7 @@ py_library(
], ],
deps = [ deps = [
"//tensorflow", "//tensorflow",
"//tensorflow/models/research/object_detection/utils:test_case",
], ],
) )
...@@ -47,9 +49,10 @@ py_library( ...@@ -47,9 +49,10 @@ py_library(
deps = [ deps = [
":feature_map_generators", ":feature_map_generators",
"//tensorflow", "//tensorflow",
"//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch", "//tensorflow/models/research/object_detection/meta_architectures:ssd_meta_arch",
"//tensorflow_models/object_detection/utils:ops", "//tensorflow/models/research/object_detection/utils:ops",
"//tensorflow_models/slim:inception_v2", "//tensorflow/models/research/object_detection/utils:shape_utils",
"//third_party/tensorflow_models/slim:inception_v2",
], ],
) )
...@@ -61,9 +64,10 @@ py_library( ...@@ -61,9 +64,10 @@ py_library(
deps = [ deps = [
":feature_map_generators", ":feature_map_generators",
"//tensorflow", "//tensorflow",
"//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch", "//tensorflow/models/research/object_detection/meta_architectures:ssd_meta_arch",
"//tensorflow_models/object_detection/utils:ops", "//tensorflow/models/research/object_detection/utils:ops",
"//tensorflow_models/slim:inception_v3", "//tensorflow/models/research/object_detection/utils:shape_utils",
"//third_party/tensorflow_models/slim:inception_v3",
], ],
) )
...@@ -73,9 +77,10 @@ py_library( ...@@ -73,9 +77,10 @@ py_library(
deps = [ deps = [
":feature_map_generators", ":feature_map_generators",
"//tensorflow", "//tensorflow",
"//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch", "//tensorflow/models/research/object_detection/meta_architectures:ssd_meta_arch",
"//tensorflow_models/object_detection/utils:ops", "//tensorflow/models/research/object_detection/utils:ops",
"//tensorflow_models/slim:mobilenet_v1", "//tensorflow/models/research/object_detection/utils:shape_utils",
"//third_party/tensorflow_models/slim:mobilenet_v1",
], ],
) )
...@@ -86,8 +91,40 @@ py_library( ...@@ -86,8 +91,40 @@ py_library(
":feature_map_generators", ":feature_map_generators",
":ssd_mobilenet_v1_feature_extractor", ":ssd_mobilenet_v1_feature_extractor",
"//tensorflow", "//tensorflow",
"//tensorflow_models/object_detection/utils:ops", "//tensorflow/models/research/object_detection/utils:ops",
"//tensorflow_models/slim:mobilenet_v1", "//third_party/tensorflow_models/slim:mobilenet_v1",
],
)
py_library(
name = "ssd_resnet_v1_fpn_feature_extractor",
srcs = ["ssd_resnet_v1_fpn_feature_extractor.py"],
deps = [
":feature_map_generators",
"//tensorflow",
"//tensorflow/models/research/object_detection/meta_architectures:ssd_meta_arch",
"//tensorflow/models/research/object_detection/utils:ops",
"//tensorflow/models/research/object_detection/utils:shape_utils",
"//third_party/tensorflow_models/slim:resnet_v1",
],
)
py_library(
name = "ssd_resnet_v1_fpn_feature_extractor_testbase",
srcs = ["ssd_resnet_v1_fpn_feature_extractor_testbase.py"],
deps = [
"//tensorflow/models/research/object_detection/models:ssd_feature_extractor_test",
],
)
py_test(
name = "ssd_resnet_v1_fpn_feature_extractor_test",
timeout = "long",
srcs = ["ssd_resnet_v1_fpn_feature_extractor_test.py"],
deps = [
":ssd_resnet_v1_fpn_feature_extractor",
":ssd_resnet_v1_fpn_feature_extractor_testbase",
"//tensorflow",
], ],
) )
...@@ -153,8 +190,8 @@ py_library( ...@@ -153,8 +190,8 @@ py_library(
], ],
deps = [ deps = [
"//tensorflow", "//tensorflow",
"//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch", "//tensorflow/models/research/object_detection/meta_architectures:faster_rcnn_meta_arch",
"//tensorflow_models/slim:nasnet", "//third_party/tensorflow_models/slim:nasnet",
], ],
) )
...@@ -165,8 +202,8 @@ py_library( ...@@ -165,8 +202,8 @@ py_library(
], ],
deps = [ deps = [
"//tensorflow", "//tensorflow",
"//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch", "//tensorflow/models/research/object_detection/meta_architectures:faster_rcnn_meta_arch",
"//tensorflow_models/slim:inception_resnet_v2", "//third_party/tensorflow_models/slim:inception_resnet_v2",
], ],
) )
...@@ -188,8 +225,8 @@ py_library( ...@@ -188,8 +225,8 @@ py_library(
], ],
deps = [ deps = [
"//tensorflow", "//tensorflow",
"//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch", "//tensorflow/models/research/object_detection/meta_architectures:faster_rcnn_meta_arch",
"//tensorflow_models/slim:inception_v2", "//third_party/tensorflow_models/slim:inception_v2",
], ],
) )
...@@ -211,9 +248,9 @@ py_library( ...@@ -211,9 +248,9 @@ py_library(
], ],
deps = [ deps = [
"//tensorflow", "//tensorflow",
"//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch", "//tensorflow/models/research/object_detection/meta_architectures:faster_rcnn_meta_arch",
"//tensorflow_models/slim:resnet_utils", "//third_party/tensorflow_models/slim:resnet_utils",
"//tensorflow_models/slim:resnet_v1", "//third_party/tensorflow_models/slim:resnet_v1",
], ],
) )
......
...@@ -51,7 +51,9 @@ class EmbeddedSSDMobileNetV1FeatureExtractor( ...@@ -51,7 +51,9 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
pad_to_multiple, pad_to_multiple,
conv_hyperparams, conv_hyperparams,
batch_norm_trainable=True, batch_norm_trainable=True,
reuse_weights=None): reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
"""MobileNetV1 Feature Extractor for Embedded-friendly SSD Models. """MobileNetV1 Feature Extractor for Embedded-friendly SSD Models.
Args: Args:
...@@ -66,6 +68,9 @@ class EmbeddedSSDMobileNetV1FeatureExtractor( ...@@ -66,6 +68,9 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
(e.g. 1), it is desirable to disable batch norm update and use (e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params. pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
Raises: Raises:
ValueError: upon invalid `pad_to_multiple` values. ValueError: upon invalid `pad_to_multiple` values.
...@@ -76,7 +81,8 @@ class EmbeddedSSDMobileNetV1FeatureExtractor( ...@@ -76,7 +81,8 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
super(EmbeddedSSDMobileNetV1FeatureExtractor, self).__init__( super(EmbeddedSSDMobileNetV1FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights) conv_hyperparams, batch_norm_trainable, reuse_weights,
use_explicit_padding, use_depthwise)
def extract_features(self, preprocessed_inputs): def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs. """Extract features from preprocessed inputs.
...@@ -88,13 +94,25 @@ class EmbeddedSSDMobileNetV1FeatureExtractor( ...@@ -88,13 +94,25 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
Returns: Returns:
feature_maps: a list of tensors where the ith tensor has shape feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i] [batch, height_i, width_i, depth_i]
Raises:
ValueError: if image height or width are not 256 pixels.
""" """
preprocessed_inputs.get_shape().assert_has_rank(4) image_shape = preprocessed_inputs.get_shape()
shape_assert = tf.Assert( image_shape.assert_has_rank(4)
tf.logical_and( image_height = image_shape[1].value
tf.equal(tf.shape(preprocessed_inputs)[1], 256), image_width = image_shape[2].value
tf.equal(tf.shape(preprocessed_inputs)[2], 256)),
['image size must be 256 in both height and width.']) if image_height is None or image_width is None:
shape_assert = tf.Assert(
tf.logical_and(tf.equal(tf.shape(preprocessed_inputs)[1], 256),
tf.equal(tf.shape(preprocessed_inputs)[2], 256)),
['image size must be 256 in both height and width.'])
with tf.control_dependencies([shape_assert]):
preprocessed_inputs = tf.identity(preprocessed_inputs)
elif image_height != 256 or image_width != 256:
raise ValueError('image size must be = 256 in both height and width;'
' image dim = %d,%d' % (image_height, image_width))
feature_map_layout = { feature_map_layout = {
'from_layer': [ 'from_layer': [
...@@ -102,10 +120,12 @@ class EmbeddedSSDMobileNetV1FeatureExtractor( ...@@ -102,10 +120,12 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
], ],
'layer_depth': [-1, -1, 512, 256, 256], 'layer_depth': [-1, -1, 512, 256, 256],
'conv_kernel_size': [-1, -1, 3, 3, 2], 'conv_kernel_size': [-1, -1, 3, 3, 2],
'use_explicit_padding': self._use_explicit_padding,
'use_depthwise': self._use_depthwise,
} }
with tf.control_dependencies([shape_assert]): with slim.arg_scope(self._conv_hyperparams):
with slim.arg_scope(self._conv_hyperparams): with slim.arg_scope([slim.batch_norm], fused=False):
with tf.variable_scope('MobilenetV1', with tf.variable_scope('MobilenetV1',
reuse=self._reuse_weights) as scope: reuse=self._reuse_weights) as scope:
_, image_features = mobilenet_v1.mobilenet_v1_base( _, image_features = mobilenet_v1.mobilenet_v1_base(
......
...@@ -22,7 +22,7 @@ from object_detection.models import ssd_feature_extractor_test ...@@ -22,7 +22,7 @@ from object_detection.models import ssd_feature_extractor_test
class EmbeddedSSDMobileNetV1FeatureExtractorTest( class EmbeddedSSDMobileNetV1FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase): ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, batch_norm_trainable=True): is_training=True, batch_norm_trainable=True):
...@@ -51,11 +51,23 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest( ...@@ -51,11 +51,23 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest(
image_width = 256 image_width = 256
depth_multiplier = 1.0 depth_multiplier = 1.0
pad_to_multiple = 1 pad_to_multiple = 1
expected_feature_map_shape = [(4, 16, 16, 512), (4, 8, 8, 1024), expected_feature_map_shape = [(2, 16, 16, 512), (2, 8, 8, 1024),
(4, 4, 4, 512), (4, 2, 2, 256), (2, 4, 4, 512), (2, 2, 2, 256),
(4, 1, 1, 256)] (2, 1, 1, 256)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_with_dynamic_inputs(self):
image_height = 256
image_width = 256
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(2, 16, 16, 512), (2, 8, 8, 1024),
(2, 4, 4, 512), (2, 2, 2, 256),
(2, 1, 1, 256)]
self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape) expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_enforcing_min_depth(self): def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
...@@ -63,10 +75,10 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest( ...@@ -63,10 +75,10 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest(
image_width = 256 image_width = 256
depth_multiplier = 0.5**12 depth_multiplier = 0.5**12
pad_to_multiple = 1 pad_to_multiple = 1
expected_feature_map_shape = [(4, 16, 16, 32), (4, 8, 8, 32), (4, 4, 4, 32), expected_feature_map_shape = [(2, 16, 16, 32), (2, 8, 8, 32), (2, 4, 4, 32),
(4, 2, 2, 32), (4, 1, 1, 32)] (2, 2, 2, 32), (2, 1, 1, 32)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape) expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_with_pad_to_multiple_of_1( def test_extract_features_returns_correct_shapes_with_pad_to_multiple_of_1(
...@@ -75,11 +87,11 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest( ...@@ -75,11 +87,11 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest(
image_width = 256 image_width = 256
depth_multiplier = 1.0 depth_multiplier = 1.0
pad_to_multiple = 1 pad_to_multiple = 1
expected_feature_map_shape = [(4, 16, 16, 512), (4, 8, 8, 1024), expected_feature_map_shape = [(2, 16, 16, 512), (2, 8, 8, 1024),
(4, 4, 4, 512), (4, 2, 2, 256), (2, 4, 4, 512), (2, 2, 2, 256),
(4, 1, 1, 256)] (2, 1, 1, 256)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape) expected_feature_map_shape)
def test_extract_features_raises_error_with_pad_to_multiple_not_1(self): def test_extract_features_raises_error_with_pad_to_multiple_not_1(self):
......
...@@ -180,7 +180,7 @@ class FasterRCNNInceptionResnetV2FeatureExtractor( ...@@ -180,7 +180,7 @@ class FasterRCNNInceptionResnetV2FeatureExtractor(
faster_rcnn_meta_arch.FasterRCNNFeatureExtractor which does not work for faster_rcnn_meta_arch.FasterRCNNFeatureExtractor which does not work for
InceptionResnetV2 checkpoints. InceptionResnetV2 checkpoints.
TODO: revisit whether it's possible to force the TODO(jonathanhuang,rathodv): revisit whether it's possible to force the
`Repeat` namescope as created in `_extract_box_classifier_features` to `Repeat` namescope as created in `_extract_box_classifier_features` to
start counting at 2 (e.g. `Repeat_2`) so that the default restore_fn can start counting at 2 (e.g. `Repeat_2`) so that the default restore_fn can
be used. be used.
......
...@@ -111,7 +111,8 @@ class FasterRCNNResnetV1FeatureExtractor( ...@@ -111,7 +111,8 @@ class FasterRCNNResnetV1FeatureExtractor(
with tf.control_dependencies([shape_assert]): with tf.control_dependencies([shape_assert]):
# Disables batchnorm for fine-tuning with smaller batch sizes. # Disables batchnorm for fine-tuning with smaller batch sizes.
# TODO: Figure out if it is needed when image batch size is bigger. # TODO: Figure out if it is needed when image
# batch size is bigger.
with slim.arg_scope( with slim.arg_scope(
resnet_utils.resnet_arg_scope( resnet_utils.resnet_arg_scope(
batch_norm_epsilon=1e-5, batch_norm_epsilon=1e-5,
......
...@@ -25,6 +25,7 @@ of final feature maps. ...@@ -25,6 +25,7 @@ of final feature maps.
""" """
import collections import collections
import tensorflow as tf import tensorflow as tf
from object_detection.utils import ops
slim = tf.contrib.slim slim = tf.contrib.slim
...@@ -115,6 +116,9 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier, ...@@ -115,6 +116,9 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
feature_map_keys = [] feature_map_keys = []
feature_maps = [] feature_maps = []
base_from_layer = '' base_from_layer = ''
use_explicit_padding = False
if 'use_explicit_padding' in feature_map_layout:
use_explicit_padding = feature_map_layout['use_explicit_padding']
use_depthwise = False use_depthwise = False
if 'use_depthwise' in feature_map_layout: if 'use_depthwise' in feature_map_layout:
use_depthwise = feature_map_layout['use_depthwise'] use_depthwise = feature_map_layout['use_depthwise']
...@@ -139,16 +143,21 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier, ...@@ -139,16 +143,21 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
padding='SAME', padding='SAME',
stride=1, stride=1,
scope=layer_name) scope=layer_name)
stride = 2
layer_name = '{}_2_Conv2d_{}_{}x{}_s2_{}'.format( layer_name = '{}_2_Conv2d_{}_{}x{}_s2_{}'.format(
base_from_layer, index, conv_kernel_size, conv_kernel_size, base_from_layer, index, conv_kernel_size, conv_kernel_size,
depth_fn(layer_depth)) depth_fn(layer_depth))
stride = 2
padding = 'SAME'
if use_explicit_padding:
padding = 'VALID'
intermediate_layer = ops.fixed_padding(
intermediate_layer, conv_kernel_size)
if use_depthwise: if use_depthwise:
feature_map = slim.separable_conv2d( feature_map = slim.separable_conv2d(
intermediate_layer, intermediate_layer,
None, [conv_kernel_size, conv_kernel_size], None, [conv_kernel_size, conv_kernel_size],
depth_multiplier=1, depth_multiplier=1,
padding='SAME', padding=padding,
stride=stride, stride=stride,
scope=layer_name + '_depthwise') scope=layer_name + '_depthwise')
feature_map = slim.conv2d( feature_map = slim.conv2d(
...@@ -161,10 +170,56 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier, ...@@ -161,10 +170,56 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
feature_map = slim.conv2d( feature_map = slim.conv2d(
intermediate_layer, intermediate_layer,
depth_fn(layer_depth), [conv_kernel_size, conv_kernel_size], depth_fn(layer_depth), [conv_kernel_size, conv_kernel_size],
padding='SAME', padding=padding,
stride=stride, stride=stride,
scope=layer_name) scope=layer_name)
feature_map_keys.append(layer_name) feature_map_keys.append(layer_name)
feature_maps.append(feature_map) feature_maps.append(feature_map)
return collections.OrderedDict( return collections.OrderedDict(
[(x, y) for (x, y) in zip(feature_map_keys, feature_maps)]) [(x, y) for (x, y) in zip(feature_map_keys, feature_maps)])
def fpn_top_down_feature_maps(image_features, depth, scope=None):
"""Generates `top-down` feature maps for Feature Pyramid Networks.
See https://arxiv.org/abs/1612.03144 for details.
Args:
image_features: list of image feature tensors. Spatial resolutions of
succesive tensors must reduce exactly by a factor of 2.
depth: depth of output feature maps.
scope: A scope name to wrap this op under.
Returns:
feature_maps: an OrderedDict mapping keys (feature map names) to
tensors where each tensor has shape [batch, height_i, width_i, depth_i].
"""
with tf.variable_scope(
scope, 'top_down', image_features):
num_levels = len(image_features)
output_feature_maps_list = []
output_feature_map_keys = []
with slim.arg_scope(
[slim.conv2d],
activation_fn=None, normalizer_fn=None, padding='SAME', stride=1):
top_down = slim.conv2d(
image_features[-1],
depth, [1, 1], scope='projection_%d' % num_levels)
output_feature_maps_list.append(top_down)
output_feature_map_keys.append(
'top_down_feature_map_%d' % (num_levels - 1))
for level in reversed(range(num_levels - 1)):
top_down = ops.nearest_neighbor_upsampling(top_down, 2)
residual = slim.conv2d(
image_features[level], depth, [1, 1],
scope='projection_%d' % (level + 1))
top_down = 0.5 * top_down + 0.5 * residual
output_feature_maps_list.append(slim.conv2d(
top_down,
depth, [3, 3],
activation_fn=None,
scope='smoothing_%d' % (level + 1)))
output_feature_map_keys.append('top_down_feature_map_%d' % level)
return collections.OrderedDict(
reversed(zip(output_feature_map_keys, output_feature_maps_list)))
...@@ -40,7 +40,7 @@ EMBEDDED_SSD_MOBILENET_V1_LAYOUT = { ...@@ -40,7 +40,7 @@ EMBEDDED_SSD_MOBILENET_V1_LAYOUT = {
} }
# TODO(rathodv): add tests with different anchor strides. # TODO: add tests with different anchor strides.
class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase): class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
def test_get_expected_feature_map_shapes_with_inception_v2(self): def test_get_expected_feature_map_shapes_with_inception_v2(self):
...@@ -134,6 +134,34 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase): ...@@ -134,6 +134,34 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes) self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
class FPNFeatureMapGeneratorTest(tf.test.TestCase):
def test_get_expected_feature_map_shapes(self):
image_features = [
tf.random_uniform([4, 8, 8, 256], dtype=tf.float32),
tf.random_uniform([4, 4, 4, 256], dtype=tf.float32),
tf.random_uniform([4, 2, 2, 256], dtype=tf.float32),
tf.random_uniform([4, 1, 1, 256], dtype=tf.float32),
]
feature_maps = feature_map_generators.fpn_top_down_feature_maps(
image_features=image_features, depth=128)
expected_feature_map_shapes = {
'top_down_feature_map_0': (4, 8, 8, 128),
'top_down_feature_map_1': (4, 4, 4, 128),
'top_down_feature_map_2': (4, 2, 2, 128),
'top_down_feature_map_3': (4, 1, 1, 128)
}
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
out_feature_maps = sess.run(feature_maps)
out_feature_map_shapes = {key: value.shape
for key, value in out_feature_maps.items()}
self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
class GetDepthFunctionTest(tf.test.TestCase): class GetDepthFunctionTest(tf.test.TestCase):
def test_return_min_depth_when_multiplier_is_small(self): def test_return_min_depth_when_multiplier_is_small(self):
......
...@@ -17,33 +17,14 @@ ...@@ -17,33 +17,14 @@
from abc import abstractmethod from abc import abstractmethod
import itertools
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from object_detection.utils import test_case
class SsdFeatureExtractorTestBase(object):
def _validate_features_shape(self, class SsdFeatureExtractorTestBase(test_case.TestCase):
feature_extractor,
preprocessed_inputs,
expected_feature_map_shapes):
"""Checks the extracted features are of correct shape.
Args:
feature_extractor: The feature extractor to test.
preprocessed_inputs: A [batch, height, width, 3] tensor to extract
features with.
expected_feature_map_shapes: The expected shape of the extracted features.
"""
feature_maps = feature_extractor.extract_features(preprocessed_inputs)
feature_map_shapes = [tf.shape(feature_map) for feature_map in feature_maps]
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
feature_map_shapes_out = sess.run(feature_map_shapes)
for shape_out, exp_shape_out in zip(
feature_map_shapes_out, expected_feature_map_shapes):
self.assertAllEqual(shape_out, exp_shape_out)
@abstractmethod @abstractmethod
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple): def _create_feature_extractor(self, depth_multiplier, pad_to_multiple):
...@@ -59,14 +40,39 @@ class SsdFeatureExtractorTestBase(object): ...@@ -59,14 +40,39 @@ class SsdFeatureExtractorTestBase(object):
pass pass
def check_extract_features_returns_correct_shape( def check_extract_features_returns_correct_shape(
self, image_height, image_width, depth_multiplier, pad_to_multiple, self, batch_size, image_height, image_width, depth_multiplier,
expected_feature_map_shapes_out): pad_to_multiple, expected_feature_map_shapes):
feature_extractor = self._create_feature_extractor(depth_multiplier, def graph_fn(image_tensor):
pad_to_multiple) feature_extractor = self._create_feature_extractor(depth_multiplier,
preprocessed_inputs = tf.random_uniform( pad_to_multiple)
[4, image_height, image_width, 3], dtype=tf.float32) feature_maps = feature_extractor.extract_features(image_tensor)
self._validate_features_shape( return feature_maps
feature_extractor, preprocessed_inputs, expected_feature_map_shapes_out)
image_tensor = np.random.rand(batch_size, image_height, image_width,
3).astype(np.float32)
feature_maps = self.execute(graph_fn, [image_tensor])
for feature_map, expected_shape in itertools.izip(
feature_maps, expected_feature_map_shapes):
self.assertAllEqual(feature_map.shape, expected_shape)
def check_extract_features_returns_correct_shapes_with_dynamic_inputs(
self, batch_size, image_height, image_width, depth_multiplier,
pad_to_multiple, expected_feature_map_shapes):
def graph_fn(image_height, image_width):
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
image_tensor = tf.random_uniform([batch_size, image_height, image_width,
3], dtype=tf.float32)
feature_maps = feature_extractor.extract_features(image_tensor)
return feature_maps
feature_maps = self.execute_cpu(graph_fn, [
np.array(image_height, dtype=np.int32),
np.array(image_width, dtype=np.int32)
])
for feature_map, expected_shape in itertools.izip(
feature_maps, expected_feature_map_shapes):
self.assertAllEqual(feature_map.shape, expected_shape)
def check_extract_features_raises_error_with_invalid_image_size( def check_extract_features_raises_error_with_invalid_image_size(
self, image_height, image_width, depth_multiplier, pad_to_multiple): self, image_height, image_width, depth_multiplier, pad_to_multiple):
......
...@@ -19,6 +19,7 @@ import tensorflow as tf ...@@ -19,6 +19,7 @@ import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators from object_detection.models import feature_map_generators
from object_detection.utils import ops from object_detection.utils import ops
from object_detection.utils import shape_utils
from nets import inception_v2 from nets import inception_v2
slim = tf.contrib.slim slim = tf.contrib.slim
...@@ -34,7 +35,9 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -34,7 +35,9 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
pad_to_multiple, pad_to_multiple,
conv_hyperparams, conv_hyperparams,
batch_norm_trainable=True, batch_norm_trainable=True,
reuse_weights=None): reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
"""InceptionV2 Feature Extractor for SSD Models. """InceptionV2 Feature Extractor for SSD Models.
Args: Args:
...@@ -49,10 +52,14 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -49,10 +52,14 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
(e.g. 1), it is desirable to disable batch norm update and use (e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params. pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
""" """
super(SSDInceptionV2FeatureExtractor, self).__init__( super(SSDInceptionV2FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights) conv_hyperparams, batch_norm_trainable, reuse_weights,
use_explicit_padding, use_depthwise)
def preprocess(self, resized_inputs): def preprocess(self, resized_inputs):
"""SSD preprocessing. """SSD preprocessing.
...@@ -80,32 +87,30 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -80,32 +87,30 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
feature_maps: a list of tensors where the ith tensor has shape feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i] [batch, height_i, width_i, depth_i]
""" """
preprocessed_inputs.get_shape().assert_has_rank(4) preprocessed_inputs = shape_utils.check_min_image_dim(
shape_assert = tf.Assert( 33, preprocessed_inputs)
tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
['image size must at least be 33 in both height and width.'])
feature_map_layout = { feature_map_layout = {
'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', ''], 'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', ''],
'layer_depth': [-1, -1, 512, 256, 256, 128], 'layer_depth': [-1, -1, 512, 256, 256, 128],
'use_explicit_padding': self._use_explicit_padding,
'use_depthwise': self._use_depthwise,
} }
with tf.control_dependencies([shape_assert]): with slim.arg_scope(self._conv_hyperparams):
with slim.arg_scope(self._conv_hyperparams): with tf.variable_scope('InceptionV2',
with tf.variable_scope('InceptionV2', reuse=self._reuse_weights) as scope:
reuse=self._reuse_weights) as scope: _, image_features = inception_v2.inception_v2_base(
_, image_features = inception_v2.inception_v2_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Mixed_5c',
final_endpoint='Mixed_5c', min_depth=self._min_depth,
min_depth=self._min_depth, depth_multiplier=self._depth_multiplier,
depth_multiplier=self._depth_multiplier, scope=scope)
scope=scope) feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout,
feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier,
depth_multiplier=self._depth_multiplier, min_depth=self._min_depth,
min_depth=self._min_depth, insert_1x1_conv=True,
insert_1x1_conv=True, image_features=image_features)
image_features=image_features)
return feature_maps.values() return feature_maps.values()
...@@ -22,7 +22,7 @@ from object_detection.models import ssd_inception_v2_feature_extractor ...@@ -22,7 +22,7 @@ from object_detection.models import ssd_inception_v2_feature_extractor
class SsdInceptionV2FeatureExtractorTest( class SsdInceptionV2FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase): ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, batch_norm_trainable=True): is_training=True, batch_norm_trainable=True):
...@@ -49,11 +49,23 @@ class SsdInceptionV2FeatureExtractorTest( ...@@ -49,11 +49,23 @@ class SsdInceptionV2FeatureExtractorTest(
image_width = 128 image_width = 128
depth_multiplier = 1.0 depth_multiplier = 1.0
pad_to_multiple = 1 pad_to_multiple = 1
expected_feature_map_shape = [(4, 8, 8, 576), (4, 4, 4, 1024), expected_feature_map_shape = [(2, 8, 8, 576), (2, 4, 4, 1024),
(4, 2, 2, 512), (4, 1, 1, 256), (2, 2, 2, 512), (2, 1, 1, 256),
(4, 1, 1, 256), (4, 1, 1, 128)] (2, 1, 1, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_with_dynamic_inputs(self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(2, 8, 8, 576), (2, 4, 4, 1024),
(2, 2, 2, 512), (2, 1, 1, 256),
(2, 1, 1, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape) expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_299(self): def test_extract_features_returns_correct_shapes_299(self):
...@@ -61,11 +73,11 @@ class SsdInceptionV2FeatureExtractorTest( ...@@ -61,11 +73,11 @@ class SsdInceptionV2FeatureExtractorTest(
image_width = 299 image_width = 299
depth_multiplier = 1.0 depth_multiplier = 1.0
pad_to_multiple = 1 pad_to_multiple = 1
expected_feature_map_shape = [(4, 19, 19, 576), (4, 10, 10, 1024), expected_feature_map_shape = [(2, 19, 19, 576), (2, 10, 10, 1024),
(4, 5, 5, 512), (4, 3, 3, 256), (2, 5, 5, 512), (2, 3, 3, 256),
(4, 2, 2, 256), (4, 1, 1, 128)] (2, 2, 2, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape) expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_enforcing_min_depth(self): def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
...@@ -73,11 +85,11 @@ class SsdInceptionV2FeatureExtractorTest( ...@@ -73,11 +85,11 @@ class SsdInceptionV2FeatureExtractorTest(
image_width = 299 image_width = 299
depth_multiplier = 0.5**12 depth_multiplier = 0.5**12
pad_to_multiple = 1 pad_to_multiple = 1
expected_feature_map_shape = [(4, 19, 19, 128), (4, 10, 10, 128), expected_feature_map_shape = [(2, 19, 19, 128), (2, 10, 10, 128),
(4, 5, 5, 32), (4, 3, 3, 32), (2, 5, 5, 32), (2, 3, 3, 32),
(4, 2, 2, 32), (4, 1, 1, 32)] (2, 2, 2, 32), (2, 1, 1, 32)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape) expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self): def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
...@@ -85,11 +97,11 @@ class SsdInceptionV2FeatureExtractorTest( ...@@ -85,11 +97,11 @@ class SsdInceptionV2FeatureExtractorTest(
image_width = 299 image_width = 299
depth_multiplier = 1.0 depth_multiplier = 1.0
pad_to_multiple = 32 pad_to_multiple = 32
expected_feature_map_shape = [(4, 20, 20, 576), (4, 10, 10, 1024), expected_feature_map_shape = [(2, 20, 20, 576), (2, 10, 10, 1024),
(4, 5, 5, 512), (4, 3, 3, 256), (2, 5, 5, 512), (2, 3, 3, 256),
(4, 2, 2, 256), (4, 1, 1, 128)] (2, 2, 2, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape) expected_feature_map_shape)
def test_extract_features_raises_error_with_invalid_image_size(self): def test_extract_features_raises_error_with_invalid_image_size(self):
......
...@@ -19,6 +19,7 @@ import tensorflow as tf ...@@ -19,6 +19,7 @@ import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators from object_detection.models import feature_map_generators
from object_detection.utils import ops from object_detection.utils import ops
from object_detection.utils import shape_utils
from nets import inception_v3 from nets import inception_v3
slim = tf.contrib.slim slim = tf.contrib.slim
...@@ -34,7 +35,9 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -34,7 +35,9 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
pad_to_multiple, pad_to_multiple,
conv_hyperparams, conv_hyperparams,
batch_norm_trainable=True, batch_norm_trainable=True,
reuse_weights=None): reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
"""InceptionV3 Feature Extractor for SSD Models. """InceptionV3 Feature Extractor for SSD Models.
Args: Args:
...@@ -49,10 +52,14 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -49,10 +52,14 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
(e.g. 1), it is desirable to disable batch norm update and use (e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params. pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
""" """
super(SSDInceptionV3FeatureExtractor, self).__init__( super(SSDInceptionV3FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights) conv_hyperparams, batch_norm_trainable, reuse_weights,
use_explicit_padding, use_depthwise)
def preprocess(self, resized_inputs): def preprocess(self, resized_inputs):
"""SSD preprocessing. """SSD preprocessing.
...@@ -80,32 +87,29 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -80,32 +87,29 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
feature_maps: a list of tensors where the ith tensor has shape feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i] [batch, height_i, width_i, depth_i]
""" """
preprocessed_inputs.get_shape().assert_has_rank(4) preprocessed_inputs = shape_utils.check_min_image_dim(
shape_assert = tf.Assert( 33, preprocessed_inputs)
tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
['image size must at least be 33 in both height and width.'])
feature_map_layout = { feature_map_layout = {
'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''], 'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 128], 'layer_depth': [-1, -1, -1, 512, 256, 128],
'use_explicit_padding': self._use_explicit_padding,
'use_depthwise': self._use_depthwise,
} }
with tf.control_dependencies([shape_assert]): with slim.arg_scope(self._conv_hyperparams):
with slim.arg_scope(self._conv_hyperparams): with tf.variable_scope('InceptionV3', reuse=self._reuse_weights) as scope:
with tf.variable_scope('InceptionV3', _, image_features = inception_v3.inception_v3_base(
reuse=self._reuse_weights) as scope: ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
_, image_features = inception_v3.inception_v3_base( final_endpoint='Mixed_7c',
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), min_depth=self._min_depth,
final_endpoint='Mixed_7c', depth_multiplier=self._depth_multiplier,
min_depth=self._min_depth, scope=scope)
depth_multiplier=self._depth_multiplier, feature_maps = feature_map_generators.multi_resolution_feature_maps(
scope=scope) feature_map_layout=feature_map_layout,
feature_maps = feature_map_generators.multi_resolution_feature_maps( depth_multiplier=self._depth_multiplier,
feature_map_layout=feature_map_layout, min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier, insert_1x1_conv=True,
min_depth=self._min_depth, image_features=image_features)
insert_1x1_conv=True,
image_features=image_features)
return feature_maps.values() return feature_maps.values()
...@@ -22,7 +22,7 @@ from object_detection.models import ssd_inception_v3_feature_extractor ...@@ -22,7 +22,7 @@ from object_detection.models import ssd_inception_v3_feature_extractor
class SsdInceptionV3FeatureExtractorTest( class SsdInceptionV3FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase): ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, batch_norm_trainable=True): is_training=True, batch_norm_trainable=True):
...@@ -49,11 +49,23 @@ class SsdInceptionV3FeatureExtractorTest( ...@@ -49,11 +49,23 @@ class SsdInceptionV3FeatureExtractorTest(
image_width = 128 image_width = 128
depth_multiplier = 1.0 depth_multiplier = 1.0
pad_to_multiple = 1 pad_to_multiple = 1
expected_feature_map_shape = [(4, 13, 13, 288), (4, 6, 6, 768), expected_feature_map_shape = [(2, 13, 13, 288), (2, 6, 6, 768),
(4, 2, 2, 2048), (4, 1, 1, 512), (2, 2, 2, 2048), (2, 1, 1, 512),
(4, 1, 1, 256), (4, 1, 1, 128)] (2, 1, 1, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_with_dynamic_inputs(self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(2, 13, 13, 288), (2, 6, 6, 768),
(2, 2, 2, 2048), (2, 1, 1, 512),
(2, 1, 1, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape) expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_299(self): def test_extract_features_returns_correct_shapes_299(self):
...@@ -61,11 +73,11 @@ class SsdInceptionV3FeatureExtractorTest( ...@@ -61,11 +73,11 @@ class SsdInceptionV3FeatureExtractorTest(
image_width = 299 image_width = 299
depth_multiplier = 1.0 depth_multiplier = 1.0
pad_to_multiple = 1 pad_to_multiple = 1
expected_feature_map_shape = [(4, 35, 35, 288), (4, 17, 17, 768), expected_feature_map_shape = [(2, 35, 35, 288), (2, 17, 17, 768),
(4, 8, 8, 2048), (4, 4, 4, 512), (2, 8, 8, 2048), (2, 4, 4, 512),
(4, 2, 2, 256), (4, 1, 1, 128)] (2, 2, 2, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape) expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_enforcing_min_depth(self): def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
...@@ -73,11 +85,11 @@ class SsdInceptionV3FeatureExtractorTest( ...@@ -73,11 +85,11 @@ class SsdInceptionV3FeatureExtractorTest(
image_width = 299 image_width = 299
depth_multiplier = 0.5**12 depth_multiplier = 0.5**12
pad_to_multiple = 1 pad_to_multiple = 1
expected_feature_map_shape = [(4, 35, 35, 128), (4, 17, 17, 128), expected_feature_map_shape = [(2, 35, 35, 128), (2, 17, 17, 128),
(4, 8, 8, 192), (4, 4, 4, 32), (2, 8, 8, 192), (2, 4, 4, 32),
(4, 2, 2, 32), (4, 1, 1, 32)] (2, 2, 2, 32), (2, 1, 1, 32)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape) expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self): def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
...@@ -85,11 +97,11 @@ class SsdInceptionV3FeatureExtractorTest( ...@@ -85,11 +97,11 @@ class SsdInceptionV3FeatureExtractorTest(
image_width = 299 image_width = 299
depth_multiplier = 1.0 depth_multiplier = 1.0
pad_to_multiple = 32 pad_to_multiple = 32
expected_feature_map_shape = [(4, 37, 37, 288), (4, 18, 18, 768), expected_feature_map_shape = [(2, 37, 37, 288), (2, 18, 18, 768),
(4, 8, 8, 2048), (4, 4, 4, 512), (2, 8, 8, 2048), (2, 4, 4, 512),
(4, 2, 2, 256), (4, 1, 1, 128)] (2, 2, 2, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape) expected_feature_map_shape)
def test_extract_features_raises_error_with_invalid_image_size(self): def test_extract_features_raises_error_with_invalid_image_size(self):
......
...@@ -20,6 +20,7 @@ import tensorflow as tf ...@@ -20,6 +20,7 @@ import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators from object_detection.models import feature_map_generators
from object_detection.utils import ops from object_detection.utils import ops
from object_detection.utils import shape_utils
from nets import mobilenet_v1 from nets import mobilenet_v1
slim = tf.contrib.slim slim = tf.contrib.slim
...@@ -35,7 +36,9 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -35,7 +36,9 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
pad_to_multiple, pad_to_multiple,
conv_hyperparams, conv_hyperparams,
batch_norm_trainable=True, batch_norm_trainable=True,
reuse_weights=None): reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
"""MobileNetV1 Feature Extractor for SSD Models. """MobileNetV1 Feature Extractor for SSD Models.
Args: Args:
...@@ -50,10 +53,14 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -50,10 +53,14 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
(e.g. 1), it is desirable to disable batch norm update and use (e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params. pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
""" """
super(SSDMobileNetV1FeatureExtractor, self).__init__( super(SSDMobileNetV1FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights) conv_hyperparams, batch_norm_trainable, reuse_weights,
use_explicit_padding, use_depthwise)
def preprocess(self, resized_inputs): def preprocess(self, resized_inputs):
"""SSD preprocessing. """SSD preprocessing.
...@@ -81,34 +88,33 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -81,34 +88,33 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
feature_maps: a list of tensors where the ith tensor has shape feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i] [batch, height_i, width_i, depth_i]
""" """
preprocessed_inputs.get_shape().assert_has_rank(4) preprocessed_inputs = shape_utils.check_min_image_dim(
shape_assert = tf.Assert( 33, preprocessed_inputs)
tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
['image size must at least be 33 in both height and width.'])
feature_map_layout = { feature_map_layout = {
'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', 'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '',
'', ''], '', ''],
'layer_depth': [-1, -1, 512, 256, 256, 128], 'layer_depth': [-1, -1, 512, 256, 256, 128],
'use_explicit_padding': self._use_explicit_padding,
'use_depthwise': self._use_depthwise,
} }
with tf.control_dependencies([shape_assert]): with slim.arg_scope(self._conv_hyperparams):
with slim.arg_scope(self._conv_hyperparams): # TODO: Enable fused batch norm once quantization supports it.
with slim.arg_scope([slim.batch_norm], fused=False): with slim.arg_scope([slim.batch_norm], fused=False):
with tf.variable_scope('MobilenetV1', with tf.variable_scope('MobilenetV1',
reuse=self._reuse_weights) as scope: reuse=self._reuse_weights) as scope:
_, image_features = mobilenet_v1.mobilenet_v1_base( _, image_features = mobilenet_v1.mobilenet_v1_base(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
final_endpoint='Conv2d_13_pointwise', final_endpoint='Conv2d_13_pointwise',
min_depth=self._min_depth, min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier, depth_multiplier=self._depth_multiplier,
scope=scope) scope=scope)
feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_map_layout=feature_map_layout, feature_map_layout=feature_map_layout,
depth_multiplier=self._depth_multiplier, depth_multiplier=self._depth_multiplier,
min_depth=self._min_depth, min_depth=self._min_depth,
insert_1x1_conv=True, insert_1x1_conv=True,
image_features=image_features) image_features=image_features)
return feature_maps.values() return feature_maps.values()
...@@ -24,7 +24,7 @@ slim = tf.contrib.slim ...@@ -24,7 +24,7 @@ slim = tf.contrib.slim
class SsdMobilenetV1FeatureExtractorTest( class SsdMobilenetV1FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase): ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, batch_norm_trainable=True): is_training=True, batch_norm_trainable=True):
...@@ -52,11 +52,11 @@ class SsdMobilenetV1FeatureExtractorTest( ...@@ -52,11 +52,11 @@ class SsdMobilenetV1FeatureExtractorTest(
image_width = 128 image_width = 128
depth_multiplier = 1.0 depth_multiplier = 1.0
pad_to_multiple = 1 pad_to_multiple = 1
expected_feature_map_shape = [(4, 8, 8, 512), (4, 4, 4, 1024), expected_feature_map_shape = [(2, 8, 8, 512), (2, 4, 4, 1024),
(4, 2, 2, 512), (4, 1, 1, 256), (2, 2, 2, 512), (2, 1, 1, 256),
(4, 1, 1, 256), (4, 1, 1, 128)] (2, 1, 1, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape) expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_299(self): def test_extract_features_returns_correct_shapes_299(self):
...@@ -64,11 +64,23 @@ class SsdMobilenetV1FeatureExtractorTest( ...@@ -64,11 +64,23 @@ class SsdMobilenetV1FeatureExtractorTest(
image_width = 299 image_width = 299
depth_multiplier = 1.0 depth_multiplier = 1.0
pad_to_multiple = 1 pad_to_multiple = 1
expected_feature_map_shape = [(4, 19, 19, 512), (4, 10, 10, 1024), expected_feature_map_shape = [(2, 19, 19, 512), (2, 10, 10, 1024),
(4, 5, 5, 512), (4, 3, 3, 256), (2, 5, 5, 512), (2, 3, 3, 256),
(4, 2, 2, 256), (4, 1, 1, 128)] (2, 2, 2, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_with_dynamic_image_shape(self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(2, 8, 8, 512), (2, 4, 4, 1024),
(2, 2, 2, 512), (2, 1, 1, 256),
(2, 1, 1, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape) expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_enforcing_min_depth(self): def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
...@@ -76,11 +88,11 @@ class SsdMobilenetV1FeatureExtractorTest( ...@@ -76,11 +88,11 @@ class SsdMobilenetV1FeatureExtractorTest(
image_width = 299 image_width = 299
depth_multiplier = 0.5**12 depth_multiplier = 0.5**12
pad_to_multiple = 1 pad_to_multiple = 1
expected_feature_map_shape = [(4, 19, 19, 32), (4, 10, 10, 32), expected_feature_map_shape = [(2, 19, 19, 32), (2, 10, 10, 32),
(4, 5, 5, 32), (4, 3, 3, 32), (2, 5, 5, 32), (2, 3, 3, 32),
(4, 2, 2, 32), (4, 1, 1, 32)] (2, 2, 2, 32), (2, 1, 1, 32)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape) expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self): def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
...@@ -88,11 +100,11 @@ class SsdMobilenetV1FeatureExtractorTest( ...@@ -88,11 +100,11 @@ class SsdMobilenetV1FeatureExtractorTest(
image_width = 299 image_width = 299
depth_multiplier = 1.0 depth_multiplier = 1.0
pad_to_multiple = 32 pad_to_multiple = 32
expected_feature_map_shape = [(4, 20, 20, 512), (4, 10, 10, 1024), expected_feature_map_shape = [(2, 20, 20, 512), (2, 10, 10, 1024),
(4, 5, 5, 512), (4, 3, 3, 256), (2, 5, 5, 512), (2, 3, 3, 256),
(4, 2, 2, 256), (4, 1, 1, 128)] (2, 2, 2, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple, 2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape) expected_feature_map_shape)
def test_extract_features_raises_error_with_invalid_image_size(self): def test_extract_features_raises_error_with_invalid_image_size(self):
...@@ -108,7 +120,7 @@ class SsdMobilenetV1FeatureExtractorTest( ...@@ -108,7 +120,7 @@ class SsdMobilenetV1FeatureExtractorTest(
image_width = 128 image_width = 128
depth_multiplier = 1 depth_multiplier = 1
pad_to_multiple = 1 pad_to_multiple = 1
test_image = np.random.rand(4, image_height, image_width, 3) test_image = np.random.rand(2, image_height, image_width, 3)
feature_extractor = self._create_feature_extractor(depth_multiplier, feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple) pad_to_multiple)
preprocessed_image = feature_extractor.preprocess(test_image) preprocessed_image = feature_extractor.preprocess(test_image)
......
"""SSD Feature Pyramid Network (FPN) feature extractors based on Resnet v1.
See https://arxiv.org/abs/1708.02002 for details.
"""
import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators
from object_detection.utils import ops
from object_detection.utils import shape_utils
from nets import resnet_v1
slim = tf.contrib.slim
class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""SSD FPN feature extractor based on Resnet v1 architecture."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
resnet_base_fn,
resnet_scope_name,
fpn_scope_name,
batch_norm_trainable=True,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
"""SSD FPN feature extractor based on Resnet v1 architecture.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
UNUSED currently.
min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
resnet_base_fn: base resnet network to use.
resnet_scope_name: scope name under which to construct resnet
fpn_scope_name: scope name under which to construct the feature pyramid
network.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
Raises:
ValueError: On supplying invalid arguments for unused arguments.
"""
super(_SSDResnetV1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights,
use_explicit_padding)
if self._depth_multiplier != 1.0:
raise ValueError('Only depth 1.0 is supported, found: {}'.
format(self._depth_multiplier))
if self._use_explicit_padding is True:
raise ValueError('Explicit padding is not a valid option.')
self._resnet_base_fn = resnet_base_fn
self._resnet_scope_name = resnet_scope_name
self._fpn_scope_name = fpn_scope_name
def preprocess(self, resized_inputs):
"""SSD preprocessing.
VGG style channel mean subtraction as described here:
https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-mdnge.
Args:
resized_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
channel_means = [123.68, 116.779, 103.939]
return resized_inputs - [[channel_means]]
def _filter_features(self, image_features):
# TODO: Change resnet endpoint to strip scope prefixes instead
# of munging the scope here.
filtered_image_features = dict({})
for key, feature in image_features.items():
feature_name = key.split('/')[-1]
if feature_name in ['block2', 'block3', 'block4']:
filtered_image_features[feature_name] = feature
return filtered_image_features
def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
Raises:
ValueError: depth multiplier is not supported.
"""
if self._depth_multiplier != 1.0:
raise ValueError('Depth multiplier not supported.')
preprocessed_inputs = shape_utils.check_min_image_dim(
129, preprocessed_inputs)
with tf.variable_scope(
self._resnet_scope_name, reuse=self._reuse_weights) as scope:
with slim.arg_scope(resnet_v1.resnet_arg_scope()):
_, image_features = self._resnet_base_fn(
inputs=ops.pad_to_multiple(preprocessed_inputs,
self._pad_to_multiple),
num_classes=None,
is_training=self._is_training and self._batch_norm_trainable,
global_pool=False,
output_stride=None,
store_non_strided_activations=True,
scope=scope)
image_features = self._filter_features(image_features)
last_feature_map = image_features['block4']
with tf.variable_scope(self._fpn_scope_name, reuse=self._reuse_weights):
with slim.arg_scope(self._conv_hyperparams):
for i in range(5, 7):
last_feature_map = slim.conv2d(
last_feature_map,
num_outputs=256,
kernel_size=[3, 3],
stride=2,
padding='SAME',
scope='block{}'.format(i))
image_features['bottomup_{}'.format(i)] = last_feature_map
feature_maps = feature_map_generators.fpn_top_down_feature_maps(
[
image_features[key] for key in
['block2', 'block3', 'block4', 'bottomup_5', 'bottomup_6']
],
depth=256,
scope='top_down_features')
return feature_maps.values()
class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
batch_norm_trainable=True,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
"""Resnet50 v1 FPN Feature Extractor for SSD Models.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
"""
super(SSDResnet50V1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, resnet_v1.resnet_v1_50, 'resnet_v1_50', 'fpn',
batch_norm_trainable, reuse_weights, use_explicit_padding)
class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
batch_norm_trainable=True,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
"""Resnet101 v1 FPN Feature Extractor for SSD Models.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
"""
super(SSDResnet101V1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, resnet_v1.resnet_v1_101, 'resnet_v1_101', 'fpn',
batch_norm_trainable, reuse_weights, use_explicit_padding)
class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
batch_norm_trainable=True,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
"""Resnet152 v1 FPN Feature Extractor for SSD Models.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
"""
super(SSDResnet152V1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, resnet_v1.resnet_v1_152, 'resnet_v1_152', 'fpn',
batch_norm_trainable, reuse_weights, use_explicit_padding)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment