Unverified Commit 31ae57eb authored by pkulzc's avatar pkulzc Committed by GitHub
Browse files

Minor fixes for object detection (#5613)

* Internal change.

PiperOrigin-RevId: 213914693

* Add original_image_spatial_shape tensor in input dictionary to store shape of the original input image

PiperOrigin-RevId: 214018767

* Remove "groundtruth_confidences" from decoders use "groundtruth_weights" to indicate label confidence.

This also solves a bug that only surfaced now - random crop routines in core/preprocessor.py did not correctly handle "groundtruth_weight" tensors returned by the decoders.

PiperOrigin-RevId: 214091843

* Update CocoMaskEvaluator to allow for a batch of image info, rather than a single image.

PiperOrigin-RevId: 214295305

* Adding the option to be able to summarize gradients.

PiperOrigin-RevId: 214310875

* Adds FasterRCNN inference on CPU

1. Adds a flag use_static_shapes_for_eval to restrict to the ops that guarantees static shape.
2. No filtering of overlapping anchors while clipping the anchors when use_static_shapes_for_eval is set to True.
3. A...
parent 0b0c9cfd
......@@ -19,7 +19,6 @@ models.
"""
from abc import abstractmethod
import re
import tensorflow as tf
from object_detection.core import box_list
......@@ -116,6 +115,25 @@ class SSDFeatureExtractor(object):
"""
raise NotImplementedError
def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
"""Returns a map of variables to load from a foreign checkpoint.
Args:
feature_extractor_scope: A scope name for the feature extractor.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
the model graph.
"""
variables_to_restore = {}
for variable in tf.global_variables():
var_name = variable.op.name
if var_name.startswith(feature_extractor_scope + '/'):
var_name = var_name.replace(feature_extractor_scope + '/', '')
variables_to_restore[var_name] = variable
return variables_to_restore
class SSDKerasFeatureExtractor(tf.keras.Model):
"""SSD Feature Extractor definition."""
......@@ -218,6 +236,25 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
def call(self, inputs, **kwargs):
return self._extract_features(inputs)
def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
"""Returns a map of variables to load from a foreign checkpoint.
Args:
feature_extractor_scope: A scope name for the feature extractor.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
the model graph.
"""
variables_to_restore = {}
for variable in tf.global_variables():
var_name = variable.op.name
if var_name.startswith(feature_extractor_scope + '/'):
var_name = var_name.replace(feature_extractor_scope + '/', '')
variables_to_restore[var_name] = variable
return variables_to_restore
class SSDMetaArch(model.DetectionModel):
"""SSD Meta-architecture definition."""
......@@ -333,13 +370,15 @@ class SSDMetaArch(model.DetectionModel):
# Slim feature extractors get an explicit naming scope
self._extract_features_scope = 'FeatureExtractor'
# TODO(jonathanhuang): handle agnostic mode
# weights
self._unmatched_class_label = tf.constant([1] + self.num_classes * [0],
tf.float32)
if encode_background_as_zeros:
if self._add_background_class and encode_background_as_zeros:
self._unmatched_class_label = tf.constant((self.num_classes + 1) * [0],
tf.float32)
elif self._add_background_class:
self._unmatched_class_label = tf.constant([1] + self.num_classes * [0],
tf.float32)
else:
self._unmatched_class_label = tf.constant(self.num_classes * [0],
tf.float32)
self._target_assigner = target_assigner_instance
......@@ -606,14 +645,22 @@ class SSDMetaArch(model.DetectionModel):
detection_boxes = tf.identity(detection_boxes, 'raw_box_locations')
detection_boxes = tf.expand_dims(detection_boxes, axis=2)
detection_scores_with_background = self._score_conversion_fn(
class_predictions)
detection_scores_with_background = tf.identity(
detection_scores_with_background, 'raw_box_scores')
detection_scores = tf.slice(detection_scores_with_background, [0, 0, 1],
[-1, -1, -1])
detection_scores = self._score_conversion_fn(class_predictions)
detection_scores = tf.identity(detection_scores, 'raw_box_scores')
if self._add_background_class:
detection_scores = tf.slice(detection_scores, [0, 0, 1], [-1, -1, -1])
additional_fields = None
batch_size = (
shape_utils.combined_static_and_dynamic_shape(preprocessed_images)[0])
if 'feature_maps' in prediction_dict:
feature_map_list = []
for feature_map in prediction_dict['feature_maps']:
feature_map_list.append(tf.reshape(feature_map, [batch_size, -1]))
box_features = tf.concat(feature_map_list, 1)
box_features = tf.identity(box_features, 'raw_box_features')
if detection_keypoints is not None:
additional_fields = {
fields.BoxListFields.keypoints: detection_keypoints}
......@@ -683,17 +730,20 @@ class SSDMetaArch(model.DetectionModel):
self.groundtruth_lists(fields.BoxListFields.boxes), match_list)
if self._random_example_sampler:
batch_cls_per_anchor_weights = tf.reduce_mean(
batch_cls_weights, axis=-1)
batch_sampled_indicator = tf.to_float(
shape_utils.static_or_dynamic_map_fn(
self._minibatch_subsample_fn,
[batch_cls_targets, batch_cls_weights],
[batch_cls_targets, batch_cls_per_anchor_weights],
dtype=tf.bool,
parallel_iterations=self._parallel_iterations,
back_prop=True))
batch_reg_weights = tf.multiply(batch_sampled_indicator,
batch_reg_weights)
batch_cls_weights = tf.multiply(batch_sampled_indicator,
batch_cls_weights)
batch_cls_weights = tf.multiply(
tf.expand_dims(batch_sampled_indicator, -1),
batch_cls_weights)
losses_mask = None
if self.groundtruth_has_field(fields.InputDataFields.is_annotated):
......@@ -713,16 +763,32 @@ class SSDMetaArch(model.DetectionModel):
losses_mask=losses_mask)
if self._expected_classification_loss_under_sampling:
# Need to compute losses for assigned targets against the
# unmatched_class_label as well as their assigned targets.
# simplest thing (but wasteful) is just to calculate all losses
# twice
batch_size, num_anchors, num_classes = batch_cls_targets.get_shape()
unmatched_targets = tf.ones([batch_size, num_anchors, 1
]) * self._unmatched_class_label
unmatched_cls_losses = self._classification_loss(
prediction_dict['class_predictions_with_background'],
unmatched_targets,
weights=batch_cls_weights,
losses_mask=losses_mask)
if cls_losses.get_shape().ndims == 3:
batch_size, num_anchors, num_classes = cls_losses.get_shape()
cls_losses = tf.reshape(cls_losses, [batch_size, -1])
unmatched_cls_losses = tf.reshape(unmatched_cls_losses,
[batch_size, -1])
batch_cls_targets = tf.reshape(
batch_cls_targets, [batch_size, num_anchors * num_classes, -1])
batch_cls_targets = tf.concat(
[1 - batch_cls_targets, batch_cls_targets], axis=-1)
cls_losses = self._expected_classification_loss_under_sampling(
batch_cls_targets, cls_losses)
batch_cls_targets, cls_losses, unmatched_cls_losses)
classification_loss = tf.reduce_sum(cls_losses)
localization_loss = tf.reduce_sum(location_losses)
......@@ -971,6 +1037,26 @@ class SSDMetaArch(model.DetectionModel):
[combined_shape[0], combined_shape[1], 4]))
return decoded_boxes, decoded_keypoints
def regularization_losses(self):
"""Returns a list of regularization losses for this model.
Returns a list of regularization losses for this model that the estimator
needs to use during training/optimization.
Returns:
A list of regularization loss tensors.
"""
losses = []
slim_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
# Copy the slim losses to avoid modifying the collection
if slim_losses:
losses.extend(slim_losses)
if self._box_predictor.is_keras_model:
losses.extend(self._box_predictor.losses)
if self._feature_extractor.is_keras_model:
losses.extend(self._feature_extractor.losses)
return losses
def restore_map(self,
fine_tune_checkpoint_type='detection',
load_all_detection_checkpoint_vars=False):
......@@ -997,18 +1083,44 @@ class SSDMetaArch(model.DetectionModel):
if fine_tune_checkpoint_type not in ['detection', 'classification']:
raise ValueError('Not supported fine_tune_checkpoint_type: {}'.format(
fine_tune_checkpoint_type))
variables_to_restore = {}
for variable in tf.global_variables():
var_name = variable.op.name
if (fine_tune_checkpoint_type == 'detection' and
load_all_detection_checkpoint_vars):
variables_to_restore[var_name] = variable
else:
if var_name.startswith(self._extract_features_scope):
if fine_tune_checkpoint_type == 'classification':
var_name = (
re.split('^' + self._extract_features_scope + '/',
var_name)[-1])
if fine_tune_checkpoint_type == 'classification':
return self._feature_extractor.restore_from_classification_checkpoint_fn(
self._extract_features_scope)
if fine_tune_checkpoint_type == 'detection':
variables_to_restore = {}
for variable in tf.global_variables():
var_name = variable.op.name
if load_all_detection_checkpoint_vars:
variables_to_restore[var_name] = variable
else:
if var_name.startswith(self._extract_features_scope):
variables_to_restore[var_name] = variable
return variables_to_restore
def updates(self):
"""Returns a list of update operators for this model.
Returns a list of update operators for this model that must be executed at
each training step. The estimator's train op needs to have a control
dependency on these updates.
Returns:
A list of update operators.
"""
update_ops = []
slim_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
# Copy the slim ops to avoid modifying the collection
if slim_update_ops:
update_ops.extend(slim_update_ops)
if self._box_predictor.is_keras_model:
update_ops.extend(self._box_predictor.get_updates_for(None))
update_ops.extend(self._box_predictor.get_updates_for(
self._box_predictor.inputs))
if self._feature_extractor.is_keras_model:
update_ops.extend(self._feature_extractor.get_updates_for(None))
update_ops.extend(self._feature_extractor.get_updates_for(
self._feature_extractor.inputs))
return update_ops
......@@ -42,7 +42,7 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
random_example_sampling=False,
weight_regression_loss_by_score=False,
use_expected_classification_loss_under_sampling=False,
minimum_negative_sampling=1,
min_num_negative_samples=1,
desired_negative_sampling_ratio=3,
use_keras=False,
predict_mask=False,
......@@ -57,7 +57,7 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
weight_regression_loss_by_score=weight_regression_loss_by_score,
use_expected_classification_loss_under_sampling=
use_expected_classification_loss_under_sampling,
minimum_negative_sampling=minimum_negative_sampling,
min_num_negative_samples=min_num_negative_samples,
desired_negative_sampling_ratio=desired_negative_sampling_ratio,
use_keras=use_keras,
predict_mask=predict_mask,
......@@ -344,11 +344,11 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32)
groundtruth_boxes1 = np.array([[0, 0, .5, .5]], dtype=np.float32)
groundtruth_boxes2 = np.array([[0, 0, .5, .5]], dtype=np.float32)
groundtruth_classes1 = np.array([[0, 1]], dtype=np.float32)
groundtruth_classes2 = np.array([[0, 1]], dtype=np.float32)
groundtruth_classes1 = np.array([[1]], dtype=np.float32)
groundtruth_classes2 = np.array([[1]], dtype=np.float32)
expected_localization_loss = 0.0
expected_classification_loss = (
batch_size * num_anchors * (num_classes + 1) * np.log(2.0))
batch_size * num_anchors * num_classes * np.log(2.0))
(localization_loss, classification_loss) = self.execute(
graph_fn, [
preprocessed_input, groundtruth_boxes1, groundtruth_boxes2,
......@@ -371,7 +371,7 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
apply_hard_mining=False,
add_background_class=True,
use_expected_classification_loss_under_sampling=True,
minimum_negative_sampling=1,
min_num_negative_samples=1,
desired_negative_sampling_ratio=desired_negative_sampling_ratio)
model.provide_groundtruth(groundtruth_boxes_list,
groundtruth_classes_list)
......@@ -391,8 +391,7 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
expected_localization_loss = 0.0
expected_classification_loss = (
batch_size * (desired_negative_sampling_ratio * num_anchors +
num_classes * num_anchors) * np.log(2.0))
batch_size * (num_anchors + num_classes * num_anchors) * np.log(2.0))
(localization_loss, classification_loss) = self.execute(
graph_fn, [
preprocessed_input, groundtruth_boxes1, groundtruth_boxes2,
......@@ -432,11 +431,11 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32)
groundtruth_boxes1 = np.array([[0, 0, 1, 1]], dtype=np.float32)
groundtruth_boxes2 = np.array([[0, 0, 1, 1]], dtype=np.float32)
groundtruth_classes1 = np.array([[0, 1]], dtype=np.float32)
groundtruth_classes2 = np.array([[1, 0]], dtype=np.float32)
groundtruth_classes1 = np.array([[1]], dtype=np.float32)
groundtruth_classes2 = np.array([[0]], dtype=np.float32)
expected_localization_loss = 0.25
expected_classification_loss = (
batch_size * num_anchors * (num_classes + 1) * np.log(2.0))
batch_size * num_anchors * num_classes * np.log(2.0))
(localization_loss, classification_loss) = self.execute(
graph_fn, [
preprocessed_input, groundtruth_boxes1, groundtruth_boxes2,
......
......@@ -119,7 +119,7 @@ class SSDMetaArchTestBase(test_case.TestCase):
random_example_sampling=False,
weight_regression_loss_by_score=False,
use_expected_classification_loss_under_sampling=False,
minimum_negative_sampling=1,
min_num_negative_samples=1,
desired_negative_sampling_ratio=3,
use_keras=False,
predict_mask=False,
......@@ -130,10 +130,12 @@ class SSDMetaArchTestBase(test_case.TestCase):
mock_anchor_generator = MockAnchorGenerator2x2()
if use_keras:
mock_box_predictor = test_utils.MockKerasBoxPredictor(
is_training, num_classes, predict_mask=predict_mask)
is_training, num_classes, add_background_class=add_background_class,
predict_mask=predict_mask)
else:
mock_box_predictor = test_utils.MockBoxPredictor(
is_training, num_classes, predict_mask=predict_mask)
is_training, num_classes, add_background_class=add_background_class,
predict_mask=predict_mask)
mock_box_coder = test_utils.MockBoxCoder()
if use_keras:
fake_feature_extractor = FakeSSDKerasFeatureExtractor()
......@@ -182,7 +184,7 @@ class SSDMetaArchTestBase(test_case.TestCase):
if use_expected_classification_loss_under_sampling:
expected_classification_loss_under_sampling = functools.partial(
ops.expected_classification_loss_under_sampling,
minimum_negative_sampling=minimum_negative_sampling,
min_num_negative_samples=min_num_negative_samples,
desired_negative_sampling_ratio=desired_negative_sampling_ratio)
code_size = 4
......
......@@ -248,27 +248,30 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
detection_boxes_batched,
detection_scores_batched,
detection_classes_batched,
num_det_boxes_per_image):
num_det_boxes_per_image,
is_annotated_batched):
"""Update operation for adding batch of images to Coco evaluator."""
for (image_id, gt_box, gt_class, gt_is_crowd, num_gt_box, det_box,
det_score, det_class, num_det_box) in zip(
det_score, det_class, num_det_box, is_annotated) in zip(
image_id_batched, groundtruth_boxes_batched,
groundtruth_classes_batched, groundtruth_is_crowd_batched,
num_gt_boxes_per_image,
detection_boxes_batched, detection_scores_batched,
detection_classes_batched, num_det_boxes_per_image):
self.add_single_ground_truth_image_info(
image_id, {
'groundtruth_boxes': gt_box[:num_gt_box],
'groundtruth_classes': gt_class[:num_gt_box],
'groundtruth_is_crowd': gt_is_crowd[:num_gt_box]
})
self.add_single_detected_image_info(
image_id,
{'detection_boxes': det_box[:num_det_box],
'detection_scores': det_score[:num_det_box],
'detection_classes': det_class[:num_det_box]})
detection_classes_batched, num_det_boxes_per_image,
is_annotated_batched):
if is_annotated:
self.add_single_ground_truth_image_info(
image_id, {
'groundtruth_boxes': gt_box[:num_gt_box],
'groundtruth_classes': gt_class[:num_gt_box],
'groundtruth_is_crowd': gt_is_crowd[:num_gt_box]
})
self.add_single_detected_image_info(
image_id,
{'detection_boxes': det_box[:num_det_box],
'detection_scores': det_score[:num_det_box],
'detection_classes': det_class[:num_det_box]})
# Unpack items from the evaluation dictionary.
input_data_fields = standard_fields.InputDataFields
......@@ -284,6 +287,7 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
num_gt_boxes_per_image = eval_dict.get(
'num_groundtruth_boxes_per_image', None)
num_det_boxes_per_image = eval_dict.get('num_det_boxes_per_image', None)
is_annotated = eval_dict.get('is_annotated', None)
if groundtruth_is_crowd is None:
groundtruth_is_crowd = tf.zeros_like(groundtruth_classes, dtype=tf.bool)
......@@ -306,6 +310,11 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
num_det_boxes_per_image = tf.shape(detection_boxes)[1:2]
else:
num_det_boxes_per_image = tf.expand_dims(num_det_boxes_per_image, 0)
if is_annotated is None:
is_annotated = tf.constant([True])
else:
is_annotated = tf.expand_dims(is_annotated, 0)
else:
if num_gt_boxes_per_image is None:
num_gt_boxes_per_image = tf.tile(
......@@ -315,6 +324,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
num_det_boxes_per_image = tf.tile(
tf.shape(detection_boxes)[1:2],
multiples=tf.shape(detection_boxes)[0:1])
if is_annotated is None:
is_annotated = tf.ones_like(image_id, dtype=tf.bool)
update_op = tf.py_func(update_op, [image_id,
groundtruth_boxes,
......@@ -324,7 +335,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
detection_boxes,
detection_scores,
detection_classes,
num_det_boxes_per_image], [])
num_det_boxes_per_image,
is_annotated], [])
metric_names = ['DetectionBoxes_Precision/mAP',
'DetectionBoxes_Precision/mAP@.50IOU',
'DetectionBoxes_Precision/mAP@.75IOU',
......@@ -581,8 +593,11 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
Args:
eval_dict: A dictionary that holds tensors for evaluating object detection
performance. This dictionary may be produced from
eval_util.result_dict_for_single_example().
performance. For single-image evaluation, this dictionary may be
produced from eval_util.result_dict_for_single_example(). If multi-image
evaluation, `eval_dict` should contain the fields
'num_groundtruth_boxes_per_image' and 'num_det_boxes_per_image' to
properly unpad the tensors from the batch.
Returns:
a dictionary of metric names to tuple of value_op and update_op that can
......@@ -590,27 +605,41 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
update ops must be run together and similarly all value ops must be run
together to guarantee correct behaviour.
"""
def update_op(
image_id,
groundtruth_boxes,
groundtruth_classes,
groundtruth_instance_masks,
groundtruth_is_crowd,
detection_scores,
detection_classes,
detection_masks):
def update_op(image_id_batched, groundtruth_boxes_batched,
groundtruth_classes_batched,
groundtruth_instance_masks_batched,
groundtruth_is_crowd_batched, num_gt_boxes_per_image,
detection_scores_batched, detection_classes_batched,
detection_masks_batched, num_det_boxes_per_image):
"""Update op for metrics."""
self.add_single_ground_truth_image_info(
image_id,
{'groundtruth_boxes': groundtruth_boxes,
'groundtruth_classes': groundtruth_classes,
'groundtruth_instance_masks': groundtruth_instance_masks,
'groundtruth_is_crowd': groundtruth_is_crowd})
self.add_single_detected_image_info(
image_id,
{'detection_scores': detection_scores,
'detection_classes': detection_classes,
'detection_masks': detection_masks})
for (image_id, groundtruth_boxes, groundtruth_classes,
groundtruth_instance_masks, groundtruth_is_crowd, num_gt_box,
detection_scores, detection_classes,
detection_masks, num_det_box) in zip(
image_id_batched, groundtruth_boxes_batched,
groundtruth_classes_batched, groundtruth_instance_masks_batched,
groundtruth_is_crowd_batched, num_gt_boxes_per_image,
detection_scores_batched, detection_classes_batched,
detection_masks_batched, num_det_boxes_per_image):
self.add_single_ground_truth_image_info(
image_id, {
'groundtruth_boxes':
groundtruth_boxes[:num_gt_box],
'groundtruth_classes':
groundtruth_classes[:num_gt_box],
'groundtruth_instance_masks':
groundtruth_instance_masks[:num_gt_box],
'groundtruth_is_crowd':
groundtruth_is_crowd[:num_gt_box]
})
self.add_single_detected_image_info(
image_id, {
'detection_scores': detection_scores[:num_det_box],
'detection_classes': detection_classes[:num_det_box],
'detection_masks': detection_masks[:num_det_box]
})
# Unpack items from the evaluation dictionary.
input_data_fields = standard_fields.InputDataFields
......@@ -622,20 +651,54 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
input_data_fields.groundtruth_instance_masks]
groundtruth_is_crowd = eval_dict.get(
input_data_fields.groundtruth_is_crowd, None)
num_gt_boxes_per_image = eval_dict.get(
input_data_fields.num_groundtruth_boxes, None)
detection_scores = eval_dict[detection_fields.detection_scores]
detection_classes = eval_dict[detection_fields.detection_classes]
detection_masks = eval_dict[detection_fields.detection_masks]
num_det_boxes_per_image = eval_dict.get(detection_fields.num_detections,
None)
if groundtruth_is_crowd is None:
groundtruth_is_crowd = tf.zeros_like(groundtruth_classes, dtype=tf.bool)
update_op = tf.py_func(update_op, [image_id,
groundtruth_boxes,
groundtruth_classes,
groundtruth_instance_masks,
groundtruth_is_crowd,
detection_scores,
detection_classes,
detection_masks], [])
if not image_id.shape.as_list():
# Apply a batch dimension to all tensors.
image_id = tf.expand_dims(image_id, 0)
groundtruth_boxes = tf.expand_dims(groundtruth_boxes, 0)
groundtruth_classes = tf.expand_dims(groundtruth_classes, 0)
groundtruth_instance_masks = tf.expand_dims(groundtruth_instance_masks, 0)
groundtruth_is_crowd = tf.expand_dims(groundtruth_is_crowd, 0)
detection_scores = tf.expand_dims(detection_scores, 0)
detection_classes = tf.expand_dims(detection_classes, 0)
detection_masks = tf.expand_dims(detection_masks, 0)
if num_gt_boxes_per_image is None:
num_gt_boxes_per_image = tf.shape(groundtruth_boxes)[1:2]
else:
num_gt_boxes_per_image = tf.expand_dims(num_gt_boxes_per_image, 0)
if num_det_boxes_per_image is None:
num_det_boxes_per_image = tf.shape(detection_scores)[1:2]
else:
num_det_boxes_per_image = tf.expand_dims(num_det_boxes_per_image, 0)
else:
if num_gt_boxes_per_image is None:
num_gt_boxes_per_image = tf.tile(
tf.shape(groundtruth_boxes)[1:2],
multiples=tf.shape(groundtruth_boxes)[0:1])
if num_det_boxes_per_image is None:
num_det_boxes_per_image = tf.tile(
tf.shape(detection_scores)[1:2],
multiples=tf.shape(detection_scores)[0:1])
update_op = tf.py_func(update_op, [
image_id, groundtruth_boxes, groundtruth_classes,
groundtruth_instance_masks, groundtruth_is_crowd,
num_gt_boxes_per_image, detection_scores, detection_classes,
detection_masks, num_det_boxes_per_image
], [])
metric_names = ['DetectionMasks_Precision/mAP',
'DetectionMasks_Precision/mAP@.50IOU',
'DetectionMasks_Precision/mAP@.75IOU',
......
......@@ -308,6 +308,99 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
self.assertFalse(coco_evaluator._detection_boxes_list)
self.assertFalse(coco_evaluator._image_ids)
def testGetOneMAPWithMatchingGroundtruthAndDetectionsIsAnnotated(self):
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
_get_categories_list())
image_id = tf.placeholder(tf.string, shape=())
groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4))
groundtruth_classes = tf.placeholder(tf.float32, shape=(None))
is_annotated = tf.placeholder(tf.bool, shape=())
detection_boxes = tf.placeholder(tf.float32, shape=(None, 4))
detection_scores = tf.placeholder(tf.float32, shape=(None))
detection_classes = tf.placeholder(tf.float32, shape=(None))
input_data_fields = standard_fields.InputDataFields
detection_fields = standard_fields.DetectionResultFields
eval_dict = {
input_data_fields.key: image_id,
input_data_fields.groundtruth_boxes: groundtruth_boxes,
input_data_fields.groundtruth_classes: groundtruth_classes,
'is_annotated': is_annotated,
detection_fields.detection_boxes: detection_boxes,
detection_fields.detection_scores: detection_scores,
detection_fields.detection_classes: detection_classes
}
eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops(eval_dict)
_, update_op = eval_metric_ops['DetectionBoxes_Precision/mAP']
with self.test_session() as sess:
sess.run(update_op,
feed_dict={
image_id: 'image1',
groundtruth_boxes: np.array([[100., 100., 200., 200.]]),
groundtruth_classes: np.array([1]),
is_annotated: True,
detection_boxes: np.array([[100., 100., 200., 200.]]),
detection_scores: np.array([.8]),
detection_classes: np.array([1])
})
sess.run(update_op,
feed_dict={
image_id: 'image2',
groundtruth_boxes: np.array([[50., 50., 100., 100.]]),
groundtruth_classes: np.array([3]),
is_annotated: True,
detection_boxes: np.array([[50., 50., 100., 100.]]),
detection_scores: np.array([.7]),
detection_classes: np.array([3])
})
sess.run(update_op,
feed_dict={
image_id: 'image3',
groundtruth_boxes: np.array([[25., 25., 50., 50.]]),
groundtruth_classes: np.array([2]),
is_annotated: True,
detection_boxes: np.array([[25., 25., 50., 50.]]),
detection_scores: np.array([.9]),
detection_classes: np.array([2])
})
sess.run(update_op,
feed_dict={
image_id: 'image4',
groundtruth_boxes: np.zeros((0, 4)),
groundtruth_classes: np.zeros((0)),
is_annotated: False, # Note that this image isn't annotated.
detection_boxes: np.array([[25., 25., 50., 50.],
[25., 25., 70., 50.],
[25., 25., 80., 50.],
[25., 25., 90., 50.]]),
detection_scores: np.array([0.6, 0.7, 0.8, 0.9]),
detection_classes: np.array([1, 2, 2, 3])
})
metrics = {}
for key, (value_op, _) in eval_metric_ops.iteritems():
metrics[key] = value_op
metrics = sess.run(metrics)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.50IOU'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'],
1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'],
1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0)
self.assertFalse(coco_evaluator._groundtruth_list)
self.assertFalse(coco_evaluator._detection_boxes_list)
self.assertFalse(coco_evaluator._image_ids)
def testGetOneMAPWithMatchingGroundtruthAndDetectionsPadded(self):
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
_get_categories_list())
......@@ -665,22 +758,40 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
_, update_op = eval_metric_ops['DetectionMasks_Precision/mAP']
with self.test_session() as sess:
sess.run(update_op,
feed_dict={
image_id: 'image1',
groundtruth_boxes: np.array([[100., 100., 200., 200.]]),
groundtruth_classes: np.array([1]),
groundtruth_masks: np.pad(np.ones([1, 100, 100],
dtype=np.uint8),
((0, 0), (10, 10), (10, 10)),
mode='constant'),
detection_scores: np.array([.8]),
detection_classes: np.array([1]),
detection_masks: np.pad(np.ones([1, 100, 100],
dtype=np.uint8),
((0, 0), (10, 10), (10, 10)),
mode='constant')
})
sess.run(
update_op,
feed_dict={
image_id:
'image1',
groundtruth_boxes:
np.array([[100., 100., 200., 200.], [50., 50., 100., 100.]]),
groundtruth_classes:
np.array([1, 2]),
groundtruth_masks:
np.stack([
np.pad(
np.ones([100, 100], dtype=np.uint8), ((10, 10),
(10, 10)),
mode='constant'),
np.pad(
np.ones([50, 50], dtype=np.uint8), ((0, 70), (0, 70)),
mode='constant')
]),
detection_scores:
np.array([.9, .8]),
detection_classes:
np.array([2, 1]),
detection_masks:
np.stack([
np.pad(
np.ones([50, 50], dtype=np.uint8), ((0, 70), (0, 70)),
mode='constant'),
np.pad(
np.ones([100, 100], dtype=np.uint8), ((10, 10),
(10, 10)),
mode='constant'),
])
})
sess.run(update_op,
feed_dict={
image_id: 'image2',
......@@ -735,6 +846,106 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
self.assertFalse(coco_evaluator._image_id_to_mask_shape_map)
self.assertFalse(coco_evaluator._detection_masks_list)
def testGetOneMAPWithMatchingGroundtruthAndDetectionsBatched(self):
coco_evaluator = coco_evaluation.CocoMaskEvaluator(_get_categories_list())
batch_size = 3
image_id = tf.placeholder(tf.string, shape=(batch_size))
groundtruth_boxes = tf.placeholder(tf.float32, shape=(batch_size, None, 4))
groundtruth_classes = tf.placeholder(tf.float32, shape=(batch_size, None))
groundtruth_masks = tf.placeholder(
tf.uint8, shape=(batch_size, None, None, None))
detection_scores = tf.placeholder(tf.float32, shape=(batch_size, None))
detection_classes = tf.placeholder(tf.float32, shape=(batch_size, None))
detection_masks = tf.placeholder(
tf.uint8, shape=(batch_size, None, None, None))
input_data_fields = standard_fields.InputDataFields
detection_fields = standard_fields.DetectionResultFields
eval_dict = {
input_data_fields.key: image_id,
input_data_fields.groundtruth_boxes: groundtruth_boxes,
input_data_fields.groundtruth_classes: groundtruth_classes,
input_data_fields.groundtruth_instance_masks: groundtruth_masks,
detection_fields.detection_scores: detection_scores,
detection_fields.detection_classes: detection_classes,
detection_fields.detection_masks: detection_masks,
}
eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops(eval_dict)
_, update_op = eval_metric_ops['DetectionMasks_Precision/mAP']
with self.test_session() as sess:
sess.run(
update_op,
feed_dict={
image_id: ['image1', 'image2', 'image3'],
groundtruth_boxes:
np.array([[[100., 100., 200., 200.]],
[[50., 50., 100., 100.]],
[[25., 25., 50., 50.]]]),
groundtruth_classes:
np.array([[1], [1], [1]]),
groundtruth_masks:
np.stack([
np.pad(
np.ones([1, 100, 100], dtype=np.uint8),
((0, 0), (0, 0), (0, 0)),
mode='constant'),
np.pad(
np.ones([1, 50, 50], dtype=np.uint8),
((0, 0), (25, 25), (25, 25)),
mode='constant'),
np.pad(
np.ones([1, 25, 25], dtype=np.uint8),
((0, 0), (37, 38), (37, 38)),
mode='constant')
],
axis=0),
detection_scores:
np.array([[.8], [.8], [.8]]),
detection_classes:
np.array([[1], [1], [1]]),
detection_masks:
np.stack([
np.pad(
np.ones([1, 100, 100], dtype=np.uint8),
((0, 0), (0, 0), (0, 0)),
mode='constant'),
np.pad(
np.ones([1, 50, 50], dtype=np.uint8),
((0, 0), (25, 25), (25, 25)),
mode='constant'),
np.pad(
np.ones([1, 25, 25], dtype=np.uint8),
((0, 0), (37, 38), (37, 38)),
mode='constant')
],
axis=0)
})
metrics = {}
for key, (value_op, _) in eval_metric_ops.iteritems():
metrics[key] = value_op
metrics = sess.run(metrics)
self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP'], 1.0)
self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP@.50IOU'], 1.0)
self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP@.75IOU'], 1.0)
self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP (medium)'],
1.0)
self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP (small)'], 1.0)
self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@1'], 1.0)
self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@10'], 1.0)
self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@100'], 1.0)
self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@100 (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@100 (medium)'],
1.0)
self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@100 (small)'], 1.0)
self.assertFalse(coco_evaluator._groundtruth_list)
self.assertFalse(coco_evaluator._image_ids_with_detections)
self.assertFalse(coco_evaluator._image_id_to_mask_shape_map)
self.assertFalse(coco_evaluator._detection_masks_list)
if __name__ == '__main__':
tf.test.main()
......@@ -25,6 +25,7 @@ import os
import tensorflow as tf
from object_detection import eval_util
from object_detection import exporter as exporter_lib
from object_detection import inputs
from object_detection.builders import graph_rewriter_builder
from object_detection.builders import model_builder
......@@ -306,8 +307,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
prediction_dict, features[fields.InputDataFields.true_image_shape])
losses = [loss_tensor for loss_tensor in losses_dict.values()]
if train_config.add_regularization_loss:
regularization_losses = tf.get_collection(
tf.GraphKeys.REGULARIZATION_LOSSES)
regularization_losses = detection_model.regularization_losses()
if regularization_losses:
regularization_loss = tf.add_n(
regularization_losses, name='regularization_loss')
......@@ -353,20 +353,24 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
for var in optimizer_summary_vars:
tf.summary.scalar(var.op.name, var)
summaries = [] if use_tpu else None
if train_config.summarize_gradients:
summaries = ['gradients', 'gradient_norm', 'global_gradient_norm']
train_op = tf.contrib.layers.optimize_loss(
loss=total_loss,
global_step=global_step,
learning_rate=None,
clip_gradients=clip_gradients_value,
optimizer=training_optimizer,
update_ops=detection_model.updates(),
variables=trainable_variables,
summaries=summaries,
name='') # Preventing scope prefix on all variables.
if mode == tf.estimator.ModeKeys.PREDICT:
exported_output = exporter_lib.add_output_tensor_nodes(detections)
export_outputs = {
tf.saved_model.signature_constants.PREDICT_METHOD_NAME:
tf.estimator.export.PredictOutput(detections)
tf.estimator.export.PredictOutput(exported_output)
}
eval_metric_ops = None
......@@ -456,6 +460,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
def create_estimator_and_inputs(run_config,
hparams,
pipeline_config_path,
config_override=None,
train_steps=None,
sample_1_of_n_eval_examples=1,
sample_1_of_n_eval_on_train_examples=1,
......@@ -465,6 +470,7 @@ def create_estimator_and_inputs(run_config,
num_shards=1,
params=None,
override_eval_num_epochs=True,
save_final_config=False,
**kwargs):
"""Creates `Estimator`, input functions, and steps.
......@@ -472,6 +478,8 @@ def create_estimator_and_inputs(run_config,
run_config: A `RunConfig`.
hparams: A `HParams`.
pipeline_config_path: A path to a pipeline config file.
config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to
override the config from `pipeline_config_path`.
train_steps: Number of training steps. If None, the number of training steps
is set from the `TrainConfig` proto.
sample_1_of_n_eval_examples: Integer representing how often an eval example
......@@ -499,6 +507,8 @@ def create_estimator_and_inputs(run_config,
`use_tpu_estimator` is True.
override_eval_num_epochs: Whether to overwrite the number of epochs to
1 for eval_input.
save_final_config: Whether to save final config (obtained after applying
overrides) to `estimator.model_dir`.
**kwargs: Additional keyword arguments for configuration override.
Returns:
......@@ -522,7 +532,8 @@ def create_estimator_and_inputs(run_config,
create_eval_input_fn = MODEL_BUILD_UTIL_MAP['create_eval_input_fn']
create_predict_input_fn = MODEL_BUILD_UTIL_MAP['create_predict_input_fn']
configs = get_configs_from_pipeline_file(pipeline_config_path)
configs = get_configs_from_pipeline_file(pipeline_config_path,
config_override=config_override)
kwargs.update({
'train_steps': train_steps,
'sample_1_of_n_eval_examples': sample_1_of_n_eval_examples
......@@ -595,7 +606,7 @@ def create_estimator_and_inputs(run_config,
estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config)
# Write the as-run pipeline config to disk.
if run_config.is_chief:
if run_config.is_chief and save_final_config:
pipeline_config_final = create_pipeline_proto_from_configs(configs)
config_util.save_pipeline_config(pipeline_config_final, estimator.model_dir)
......@@ -641,11 +652,17 @@ def create_train_and_eval_specs(train_input_fn,
input_fn=train_input_fn, max_steps=train_steps)
if eval_spec_names is None:
eval_spec_names = [ str(i) for i in range(len(eval_input_fns)) ]
eval_spec_names = [str(i) for i in range(len(eval_input_fns))]
eval_specs = []
for eval_spec_name, eval_input_fn in zip(eval_spec_names, eval_input_fns):
exporter_name = '{}_{}'.format(final_exporter_name, eval_spec_name)
for index, (eval_spec_name, eval_input_fn) in enumerate(
zip(eval_spec_names, eval_input_fns)):
# Uses final_exporter_name as exporter_name for the first eval spec for
# backward compatibility.
if index == 0:
exporter_name = final_exporter_name
else:
exporter_name = '{}_{}'.format(final_exporter_name, eval_spec_name)
exporter = tf.estimator.FinalExporter(
name=exporter_name, serving_input_receiver_fn=predict_input_fn)
eval_specs.append(
......@@ -747,6 +764,7 @@ def populate_experiment(run_config,
train_steps=train_steps,
eval_steps=eval_steps,
model_fn_creator=model_fn_creator,
save_final_config=True,
**kwargs)
estimator = train_and_eval_dict['estimator']
train_input_fn = train_and_eval_dict['train_input_fn']
......
......@@ -310,7 +310,7 @@ class ModelLibTest(tf.test.TestCase):
self.assertEqual(2, len(eval_specs))
self.assertEqual(None, eval_specs[0].steps)
self.assertEqual('holdout', eval_specs[0].name)
self.assertEqual('exporter_holdout', eval_specs[0].exporters[0].name)
self.assertEqual('exporter', eval_specs[0].exporters[0].name)
self.assertEqual(None, eval_specs[1].steps)
self.assertEqual('eval_on_train', eval_specs[1].name)
......
......@@ -114,6 +114,7 @@ def main(unused_argv):
use_tpu_estimator=True,
use_tpu=FLAGS.use_tpu,
num_shards=FLAGS.num_shards,
save_final_config=FLAGS.mode == 'train',
**kwargs)
estimator = train_and_eval_dict['estimator']
train_input_fn = train_and_eval_dict['train_input_fn']
......
......@@ -72,6 +72,8 @@ class FasterRCNNResnetV1FeatureExtractor(
VGG style channel mean subtraction as described here:
https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-md
Note that if the number of channels is not equal to 3, the mean subtraction
will be skipped and the original resized_inputs will be returned.
Args:
resized_inputs: A [batch, height_in, width_in, channels] float32 tensor
......@@ -82,8 +84,11 @@ class FasterRCNNResnetV1FeatureExtractor(
tensor representing a batch of images.
"""
channel_means = [123.68, 116.779, 103.939]
return resized_inputs - [[channel_means]]
if resized_inputs.shape.as_list()[3] == 3:
channel_means = [123.68, 116.779, 103.939]
return resized_inputs - [[channel_means]]
else:
return resized_inputs
def _extract_proposal_features(self, preprocessed_inputs, scope):
"""Extracts first stage RPN features.
......
......@@ -146,7 +146,6 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
use_depthwise = feature_map_layout['use_depthwise']
for index, from_layer in enumerate(feature_map_layout['from_layer']):
net = []
self.convolutions.append(net)
layer_depth = feature_map_layout['layer_depth'][index]
conv_kernel_size = 3
if 'conv_kernel_size' in feature_map_layout:
......@@ -231,6 +230,10 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
conv_hyperparams.build_activation_layer(
name=layer_name))
# Until certain bugs are fixed in checkpointable lists,
# this net must be appended only once it's been filled with layers
self.convolutions.append(net)
def call(self, image_features):
"""Generate the multi-resolution feature maps.
......@@ -263,7 +266,8 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
min_depth, insert_1x1_conv, image_features):
min_depth, insert_1x1_conv, image_features,
pool_residual=False):
"""Generates multi resolution feature maps from input image features.
Generates multi-scale feature maps for detection as in the SSD papers by
......@@ -317,6 +321,13 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
should be inserted before shrinking the feature map.
image_features: A dictionary of handles to activation tensors from the
base feature extractor.
pool_residual: Whether to add an average pooling layer followed by a
residual connection between subsequent feature maps when the channel
depth match. For example, with option 'layer_depth': [-1, 512, 256, 256],
a pooling and residual layer is added between the third and forth feature
map. This option is better used with Weight Shared Convolution Box
Predictor when all feature maps have the same channel depth to encourage
more consistent features across multi-scale feature maps.
Returns:
feature_maps: an OrderedDict mapping keys (feature map names) to
......@@ -350,6 +361,7 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
feature_map_keys.append(from_layer)
else:
pre_layer = feature_maps[-1]
pre_layer_depth = pre_layer.get_shape().as_list()[3]
intermediate_layer = pre_layer
if insert_1x1_conv:
layer_name = '{}_1_Conv2d_{}_1x1_{}'.format(
......@@ -383,6 +395,12 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
padding='SAME',
stride=1,
scope=layer_name)
if pool_residual and pre_layer_depth == depth_fn(layer_depth):
feature_map += slim.avg_pool2d(
pre_layer, [3, 3],
padding='SAME',
stride=2,
scope=layer_name + '_pool')
else:
feature_map = slim.conv2d(
intermediate_layer,
......@@ -399,6 +417,7 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
def fpn_top_down_feature_maps(image_features,
depth,
use_depthwise=False,
use_explicit_padding=False,
scope=None):
"""Generates `top-down` feature maps for Feature Pyramid Networks.
......@@ -409,7 +428,9 @@ def fpn_top_down_feature_maps(image_features,
Spatial resolutions of succesive tensors must reduce exactly by a factor
of 2.
depth: depth of output feature maps.
use_depthwise: use depthwise separable conv instead of regular conv.
use_depthwise: whether to use depthwise separable conv instead of regular
conv.
use_explicit_padding: whether to use explicit padding.
scope: A scope name to wrap this op under.
Returns:
......@@ -420,8 +441,10 @@ def fpn_top_down_feature_maps(image_features,
num_levels = len(image_features)
output_feature_maps_list = []
output_feature_map_keys = []
padding = 'VALID' if use_explicit_padding else 'SAME'
kernel_size = 3
with slim.arg_scope(
[slim.conv2d, slim.separable_conv2d], padding='SAME', stride=1):
[slim.conv2d, slim.separable_conv2d], padding=padding, stride=1):
top_down = slim.conv2d(
image_features[-1][1],
depth, [1, 1], activation_fn=None, normalizer_fn=None,
......@@ -436,14 +459,20 @@ def fpn_top_down_feature_maps(image_features,
image_features[level][1], depth, [1, 1],
activation_fn=None, normalizer_fn=None,
scope='projection_%d' % (level + 1))
if use_explicit_padding:
# slice top_down to the same shape as residual
residual_shape = tf.shape(residual)
top_down = top_down[:, :residual_shape[1], :residual_shape[2], :]
top_down += residual
if use_depthwise:
conv_op = functools.partial(slim.separable_conv2d, depth_multiplier=1)
else:
conv_op = slim.conv2d
if use_explicit_padding:
top_down = ops.fixed_padding(top_down, kernel_size)
output_feature_maps_list.append(conv_op(
top_down,
depth, [3, 3],
depth, [kernel_size, kernel_size],
scope='smoothing_%d' % (level + 1)))
output_feature_map_keys.append('top_down_%s' % image_features[level][0])
return collections.OrderedDict(reversed(
......
......@@ -45,6 +45,11 @@ EMBEDDED_SSD_MOBILENET_V1_LAYOUT = {
'conv_kernel_size': [-1, -1, 3, 3, 2],
}
SSD_MOBILENET_V1_WEIGHT_SHARED_LAYOUT = {
'from_layer': ['Conv2d_13_pointwise', '', '', ''],
'layer_depth': [-1, 256, 256, 256],
}
@parameterized.parameters(
{'use_keras': False},
......@@ -67,7 +72,8 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
def _build_feature_map_generator(self, feature_map_layout, use_keras):
def _build_feature_map_generator(self, feature_map_layout, use_keras,
pool_residual=False):
if use_keras:
return feature_map_generators.KerasMultiResolutionFeatureMaps(
feature_map_layout=feature_map_layout,
......@@ -86,7 +92,8 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
depth_multiplier=1,
min_depth=32,
insert_1x1_conv=True,
image_features=image_features)
image_features=image_features,
pool_residual=pool_residual)
return feature_map_generator
def test_get_expected_feature_map_shapes_with_inception_v2(self, use_keras):
......@@ -209,6 +216,34 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
(key, value.shape) for key, value in out_feature_maps.items())
self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
def test_feature_map_shapes_with_pool_residual_ssd_mobilenet_v1(
self, use_keras):
image_features = {
'Conv2d_13_pointwise': tf.random_uniform([4, 8, 8, 1024],
dtype=tf.float32),
}
feature_map_generator = self._build_feature_map_generator(
feature_map_layout=SSD_MOBILENET_V1_WEIGHT_SHARED_LAYOUT,
use_keras=use_keras,
pool_residual=True
)
feature_maps = feature_map_generator(image_features)
expected_feature_map_shapes = {
'Conv2d_13_pointwise': (4, 8, 8, 1024),
'Conv2d_13_pointwise_2_Conv2d_1_3x3_s2_256': (4, 4, 4, 256),
'Conv2d_13_pointwise_2_Conv2d_2_3x3_s2_256': (4, 2, 2, 256),
'Conv2d_13_pointwise_2_Conv2d_3_3x3_s2_256': (4, 1, 1, 256)}
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
out_feature_maps = sess.run(feature_maps)
out_feature_map_shapes = dict(
(key, value.shape) for key, value in out_feature_maps.items())
self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
def test_get_expected_variable_names_with_inception_v2(self, use_keras):
image_features = {
'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
......
......@@ -82,6 +82,8 @@ class _LayersOverride(object):
self._conv_hyperparams = conv_hyperparams
self._use_explicit_padding = use_explicit_padding
self._min_depth = min_depth
self.regularizer = tf.keras.regularizers.l2(0.00004 * 0.5)
self.initializer = tf.truncated_normal_initializer(stddev=0.09)
def _FixedPaddingLayer(self, kernel_size):
return tf.keras.layers.Lambda(lambda x: ops.fixed_padding(x, kernel_size))
......@@ -114,6 +116,9 @@ class _LayersOverride(object):
if self._conv_hyperparams:
kwargs = self._conv_hyperparams.params(**kwargs)
else:
kwargs['kernel_regularizer'] = self.regularizer
kwargs['kernel_initializer'] = self.initializer
kwargs['padding'] = 'same'
kernel_size = kwargs.get('kernel_size')
......@@ -144,6 +149,8 @@ class _LayersOverride(object):
"""
if self._conv_hyperparams:
kwargs = self._conv_hyperparams.params(**kwargs)
else:
kwargs['depthwise_initializer'] = self.initializer
kwargs['padding'] = 'same'
kernel_size = kwargs.get('kernel_size')
......
......@@ -31,11 +31,10 @@ slim = tf.contrib.slim
# A modified config of mobilenet v1 that makes it more detection friendly,
def _create_modified_mobilenet_config():
conv_defs = copy.copy(mobilenet_v1.MOBILENETV1_CONV_DEFS)
conv_defs = copy.deepcopy(mobilenet_v1.MOBILENETV1_CONV_DEFS)
conv_defs[-2] = mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=2, depth=512)
conv_defs[-1] = mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=256)
return conv_defs
_CONV_DEFS = _create_modified_mobilenet_config()
class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
......@@ -98,6 +97,9 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
self._fpn_min_level = fpn_min_level
self._fpn_max_level = fpn_max_level
self._additional_layer_depth = additional_layer_depth
self._conv_defs = None
if self._use_depthwise:
self._conv_defs = _create_modified_mobilenet_config()
def preprocess(self, resized_inputs):
"""SSD preprocessing.
......@@ -141,7 +143,7 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
final_endpoint='Conv2d_13_pointwise',
min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
conv_defs=_CONV_DEFS if self._use_depthwise else None,
conv_defs=self._conv_defs,
use_explicit_padding=self._use_explicit_padding,
scope=scope)
......@@ -159,7 +161,8 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
fpn_features = feature_map_generators.fpn_top_down_feature_maps(
[(key, image_features[key]) for key in feature_block_list],
depth=depth_fn(self._additional_layer_depth),
use_depthwise=self._use_depthwise)
use_depthwise=self._use_depthwise,
use_explicit_padding=self._use_explicit_padding)
feature_maps = []
for level in range(self._fpn_min_level, base_fpn_max_level + 1):
feature_maps.append(fpn_features['top_down_{}'.format(
......@@ -167,18 +170,23 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
last_feature_map = fpn_features['top_down_{}'.format(
feature_blocks[base_fpn_max_level - 2])]
# Construct coarse features
padding = 'VALID' if self._use_explicit_padding else 'SAME'
kernel_size = 3
for i in range(base_fpn_max_level + 1, self._fpn_max_level + 1):
if self._use_depthwise:
conv_op = functools.partial(
slim.separable_conv2d, depth_multiplier=1)
else:
conv_op = slim.conv2d
if self._use_explicit_padding:
last_feature_map = ops.fixed_padding(
last_feature_map, kernel_size)
last_feature_map = conv_op(
last_feature_map,
num_outputs=depth_fn(self._additional_layer_depth),
kernel_size=[3, 3],
kernel_size=[kernel_size, kernel_size],
stride=2,
padding='SAME',
padding=padding,
scope='bottom_up_Conv2d_{}'.format(i - base_fpn_max_level + 13))
feature_maps.append(last_feature_map)
return feature_maps
......@@ -30,17 +30,14 @@ from nets.mobilenet import mobilenet_v2
slim = tf.contrib.slim
# A modified config of mobilenet v2 that makes it more detection friendly,
# A modified config of mobilenet v2 that makes it more detection friendly.
def _create_modified_mobilenet_config():
conv_defs = copy.copy(mobilenet_v2.V2_DEF)
conv_defs = copy.deepcopy(mobilenet_v2.V2_DEF)
conv_defs['spec'][-1] = mobilenet.op(
slim.conv2d, stride=1, kernel_size=[1, 1], num_outputs=256)
return conv_defs
_CONV_DEFS = _create_modified_mobilenet_config()
class SSDMobileNetV2FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""SSD Feature Extractor using MobilenetV2 FPN features."""
......@@ -100,6 +97,9 @@ class SSDMobileNetV2FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
self._fpn_min_level = fpn_min_level
self._fpn_max_level = fpn_max_level
self._additional_layer_depth = additional_layer_depth
self._conv_defs = None
if self._use_depthwise:
self._conv_defs = _create_modified_mobilenet_config()
def preprocess(self, resized_inputs):
"""SSD preprocessing.
......@@ -142,7 +142,7 @@ class SSDMobileNetV2FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
final_endpoint='layer_19',
depth_multiplier=self._depth_multiplier,
conv_defs=_CONV_DEFS if self._use_depthwise else None,
conv_defs=self._conv_defs,
use_explicit_padding=self._use_explicit_padding,
scope=scope)
depth_fn = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
......@@ -158,7 +158,8 @@ class SSDMobileNetV2FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
fpn_features = feature_map_generators.fpn_top_down_feature_maps(
[(key, image_features[key]) for key in feature_block_list],
depth=depth_fn(self._additional_layer_depth),
use_depthwise=self._use_depthwise)
use_depthwise=self._use_depthwise,
use_explicit_padding=self._use_explicit_padding)
feature_maps = []
for level in range(self._fpn_min_level, base_fpn_max_level + 1):
feature_maps.append(fpn_features['top_down_{}'.format(
......@@ -166,18 +167,23 @@ class SSDMobileNetV2FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
last_feature_map = fpn_features['top_down_{}'.format(
feature_blocks[base_fpn_max_level - 2])]
# Construct coarse features
padding = 'VALID' if self._use_explicit_padding else 'SAME'
kernel_size = 3
for i in range(base_fpn_max_level + 1, self._fpn_max_level + 1):
if self._use_depthwise:
conv_op = functools.partial(
slim.separable_conv2d, depth_multiplier=1)
else:
conv_op = slim.conv2d
if self._use_explicit_padding:
last_feature_map = ops.fixed_padding(
last_feature_map, kernel_size)
last_feature_map = conv_op(
last_feature_map,
num_outputs=depth_fn(self._additional_layer_depth),
kernel_size=[3, 3],
kernel_size=[kernel_size, kernel_size],
stride=2,
padding='SAME',
padding=padding,
scope='bottom_up_Conv2d_{}'.format(i - base_fpn_max_level + 19))
feature_maps.append(last_feature_map)
return feature_maps
......@@ -85,41 +85,44 @@ class SSDMobileNetV2KerasFeatureExtractor(
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams,
name=name)
feature_map_layout = {
self._feature_map_layout = {
'from_layer': ['layer_15/expansion_output', 'layer_19', '', '', '', ''],
'layer_depth': [-1, -1, 512, 256, 256, 128],
'use_depthwise': self._use_depthwise,
'use_explicit_padding': self._use_explicit_padding,
}
with tf.name_scope('MobilenetV2'):
full_mobilenet_v2 = mobilenet_v2.mobilenet_v2(
batchnorm_training=(is_training and not freeze_batchnorm),
conv_hyperparams=(conv_hyperparams
if self._override_base_feature_extractor_hyperparams
else None),
weights=None,
use_explicit_padding=use_explicit_padding,
alpha=self._depth_multiplier,
min_depth=self._min_depth,
include_top=False)
conv2d_11_pointwise = full_mobilenet_v2.get_layer(
name='block_13_expand_relu').output
conv2d_13_pointwise = full_mobilenet_v2.get_layer(name='out_relu').output
self.mobilenet_v2 = tf.keras.Model(
inputs=full_mobilenet_v2.inputs,
outputs=[conv2d_11_pointwise, conv2d_13_pointwise])
self.feature_map_generator = (
feature_map_generators.KerasMultiResolutionFeatureMaps(
feature_map_layout=feature_map_layout,
depth_multiplier=self._depth_multiplier,
min_depth=self._min_depth,
insert_1x1_conv=True,
is_training=is_training,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=freeze_batchnorm,
name='FeatureMaps'))
self.mobilenet_v2 = None
self.feature_map_generator = None
def build(self, input_shape):
full_mobilenet_v2 = mobilenet_v2.mobilenet_v2(
batchnorm_training=(self._is_training and not self._freeze_batchnorm),
conv_hyperparams=(self._conv_hyperparams
if self._override_base_feature_extractor_hyperparams
else None),
weights=None,
use_explicit_padding=self._use_explicit_padding,
alpha=self._depth_multiplier,
min_depth=self._min_depth,
include_top=False)
conv2d_11_pointwise = full_mobilenet_v2.get_layer(
name='block_13_expand_relu').output
conv2d_13_pointwise = full_mobilenet_v2.get_layer(name='out_relu').output
self.mobilenet_v2 = tf.keras.Model(
inputs=full_mobilenet_v2.inputs,
outputs=[conv2d_11_pointwise, conv2d_13_pointwise])
self.feature_map_generator = (
feature_map_generators.KerasMultiResolutionFeatureMaps(
feature_map_layout=self._feature_map_layout,
depth_multiplier=self._depth_multiplier,
min_depth=self._min_depth,
insert_1x1_conv=True,
is_training=self._is_training,
conv_hyperparams=self._conv_hyperparams,
freeze_batchnorm=self._freeze_batchnorm,
name='FeatureMaps'))
self.built = True
def preprocess(self, resized_inputs):
"""SSD preprocessing.
......
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""SSDFeatureExtractor for PNASNet features.
Based on PNASNet ImageNet model: https://arxiv.org/abs/1712.00559
"""
import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators
from object_detection.utils import context_manager
from object_detection.utils import ops
from nets.nasnet import pnasnet
slim = tf.contrib.slim
def pnasnet_large_arg_scope_for_detection(is_batch_norm_training=False):
"""Defines the default arg scope for the PNASNet Large for object detection.
This provides a small edit to switch batch norm training on and off.
Args:
is_batch_norm_training: Boolean indicating whether to train with batch norm.
Default is False.
Returns:
An `arg_scope` to use for the PNASNet Large Model.
"""
imagenet_scope = pnasnet.pnasnet_large_arg_scope()
with slim.arg_scope(imagenet_scope):
with slim.arg_scope([slim.batch_norm],
is_training=is_batch_norm_training) as sc:
return sc
class SSDPNASNetFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""SSD Feature Extractor using PNASNet features."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams_fn,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False,
override_base_feature_extractor_hyperparams=False):
"""PNASNet Feature Extractor for SSD Models.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops in the layers that are added on top of the
base feature extractor.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
use_depthwise: Whether to use depthwise convolutions.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_fn`.
"""
super(SSDPNASNetFeatureExtractor, self).__init__(
is_training=is_training,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams_fn=conv_hyperparams_fn,
reuse_weights=reuse_weights,
use_explicit_padding=use_explicit_padding,
use_depthwise=use_depthwise,
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams)
def preprocess(self, resized_inputs):
"""SSD preprocessing.
Maps pixel values to the range [-1, 1].
Args:
resized_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
return (2.0 / 255.0) * resized_inputs - 1.0
def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
feature_map_layout = {
'from_layer': ['Cell_7', 'Cell_11', '', '', '', ''],
'layer_depth': [-1, -1, 512, 256, 256, 128],
'use_explicit_padding': self._use_explicit_padding,
'use_depthwise': self._use_depthwise,
}
with slim.arg_scope(
pnasnet_large_arg_scope_for_detection(
is_batch_norm_training=self._is_training)):
with slim.arg_scope([slim.conv2d, slim.batch_norm, slim.separable_conv2d],
reuse=self._reuse_weights):
with (slim.arg_scope(self._conv_hyperparams_fn())
if self._override_base_feature_extractor_hyperparams else
context_manager.IdentityContextManager()):
_, image_features = pnasnet.build_pnasnet_large(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
num_classes=None,
is_training=self._is_training,
final_endpoint='Cell_11')
with tf.variable_scope('SSD_feature_maps', reuse=self._reuse_weights):
with slim.arg_scope(self._conv_hyperparams_fn()):
feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_map_layout=feature_map_layout,
depth_multiplier=self._depth_multiplier,
min_depth=self._min_depth,
insert_1x1_conv=True,
image_features=image_features)
return feature_maps.values()
def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
"""Returns a map of variables to load from a foreign checkpoint.
Note that this overrides the default implementation in
ssd_meta_arch.SSDFeatureExtractor which does not work for PNASNet
checkpoints.
Args:
feature_extractor_scope: A scope name for the first stage feature
extractor.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
the model graph.
"""
variables_to_restore = {}
for variable in tf.global_variables():
if variable.op.name.startswith(feature_extractor_scope):
var_name = variable.op.name.replace(feature_extractor_scope + '/', '')
var_name += '/ExponentialMovingAverage'
variables_to_restore[var_name] = variable
return variables_to_restore
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for ssd_pnas_feature_extractor."""
import numpy as np
import tensorflow as tf
from object_detection.models import ssd_feature_extractor_test
from object_detection.models import ssd_pnasnet_feature_extractor
slim = tf.contrib.slim
class SsdPnasNetFeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, use_explicit_padding=False):
"""Constructs a new feature extractor.
Args:
depth_multiplier: float depth multiplier for feature extractor
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
is_training: whether the network is in training mode.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
Returns:
an ssd_meta_arch.SSDFeatureExtractor object.
"""
min_depth = 32
return ssd_pnasnet_feature_extractor.SSDPNASNetFeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple,
self.conv_hyperparams_fn,
use_explicit_padding=use_explicit_padding)
def test_extract_features_returns_correct_shapes_128(self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(2, 8, 8, 2160), (2, 4, 4, 4320),
(2, 2, 2, 512), (2, 1, 1, 256),
(2, 1, 1, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_299(self):
image_height = 299
image_width = 299
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(2, 19, 19, 2160), (2, 10, 10, 4320),
(2, 5, 5, 512), (2, 3, 3, 256),
(2, 2, 2, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_preprocess_returns_correct_value_range(self):
image_height = 128
image_width = 128
depth_multiplier = 1
pad_to_multiple = 1
test_image = np.random.rand(2, image_height, image_width, 3)
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
preprocessed_image = feature_extractor.preprocess(test_image)
self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
if __name__ == '__main__':
tf.test.main()
......@@ -113,6 +113,8 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
VGG style channel mean subtraction as described here:
https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-mdnge.
Note that if the number of channels is not equal to 3, the mean subtraction
will be skipped and the original resized_inputs will be returned.
Args:
resized_inputs: a [batch, height, width, channels] float tensor
......@@ -122,8 +124,11 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
channel_means = [123.68, 116.779, 103.939]
return resized_inputs - [[channel_means]]
if resized_inputs.shape.as_list()[3] == 3:
channel_means = [123.68, 116.779, 103.939]
return resized_inputs - [[channel_means]]
else:
return resized_inputs
def _filter_features(self, image_features):
# TODO(rathodv): Change resnet endpoint to strip scope prefixes instead
......
......@@ -82,12 +82,15 @@ class SSDResnetFPNFeatureExtractorTestBase(
image_width = 128
depth_multiplier = 1
pad_to_multiple = 1
test_image = np.random.rand(4, image_height, image_width, 3)
test_image = tf.constant(np.random.rand(4, image_height, image_width, 3))
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
preprocessed_image = feature_extractor.preprocess(test_image)
self.assertAllClose(preprocessed_image,
test_image - [[123.68, 116.779, 103.939]])
with self.test_session() as sess:
test_image_out, preprocessed_image_out = sess.run(
[test_image, preprocessed_image])
self.assertAllClose(preprocessed_image_out,
test_image_out - [[123.68, 116.779, 103.939]])
def test_variables_only_created_in_scope(self):
depth_multiplier = 1
......@@ -103,5 +106,3 @@ class SSDResnetFPNFeatureExtractorTestBase(
self.assertTrue(
variable.name.startswith(self._resnet_scope_name())
or variable.name.startswith(self._fpn_scope_name()))
......@@ -98,6 +98,8 @@ class _SSDResnetPpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
VGG style channel mean subtraction as described here:
https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-mdnge.
Note that if the number of channels is not equal to 3, the mean subtraction
will be skipped and the original resized_inputs will be returned.
Args:
resized_inputs: a [batch, height, width, channels] float tensor
......@@ -107,8 +109,11 @@ class _SSDResnetPpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
channel_means = [123.68, 116.779, 103.939]
return resized_inputs - [[channel_means]]
if resized_inputs.shape.as_list()[3] == 3:
channel_means = [123.68, 116.779, 103.939]
return resized_inputs - [[channel_means]]
else:
return resized_inputs
def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment