Commit e00e0e13 authored by dreamdragon's avatar dreamdragon
Browse files

Merge remote-tracking branch 'upstream/master'

parents b915db4e 402b561b
......@@ -124,6 +124,8 @@ def transform_input_data(tensor_dict,
if fields.InputDataFields.groundtruth_instance_masks in tensor_dict:
masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
_, resized_masks, _ = image_resizer_fn(image, masks)
if use_bfloat16:
resized_masks = tf.cast(resized_masks, tf.bfloat16)
tensor_dict[fields.InputDataFields.
groundtruth_instance_masks] = resized_masks
......@@ -161,6 +163,9 @@ def transform_input_data(tensor_dict,
tensor_dict[fields.InputDataFields.groundtruth_classes] = merged_classes
tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
merged_confidences)
if fields.InputDataFields.groundtruth_boxes in tensor_dict:
tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape(
tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]
return tensor_dict
......@@ -282,12 +287,9 @@ def augment_input_data(tensor_dict, data_augmentation_options):
in tensor_dict)
include_keypoints = (fields.InputDataFields.groundtruth_keypoints
in tensor_dict)
include_label_scores = (fields.InputDataFields.groundtruth_confidences in
tensor_dict)
tensor_dict = preprocessor.preprocess(
tensor_dict, data_augmentation_options,
func_arg_map=preprocessor.get_default_func_arg_map(
include_label_scores=include_label_scores,
include_instance_masks=include_instance_masks,
include_keypoints=include_keypoints))
tensor_dict[fields.InputDataFields.image] = tf.squeeze(
......
......@@ -630,6 +630,9 @@ class DataTransformationFnTest(test_case.TestCase):
self.assertAllClose(
transformed_inputs[fields.InputDataFields.groundtruth_confidences],
[[1, 0, 1]])
self.assertAllClose(
transformed_inputs[fields.InputDataFields.num_groundtruth_boxes],
1)
def test_returns_resized_masks(self):
tensor_dict = {
......
......@@ -160,6 +160,17 @@ class FakeDetectionModel(model.DetectionModel):
}
return loss_dict
def regularization_losses(self):
"""Returns a list of regularization losses for this model.
Returns a list of regularization losses for this model that the estimator
needs to use during training/optimization.
Returns:
A list of regularization loss tensors.
"""
pass
def restore_map(self, fine_tune_checkpoint_type='detection'):
"""Returns a map of variables to load from a foreign checkpoint.
......@@ -174,6 +185,18 @@ class FakeDetectionModel(model.DetectionModel):
"""
return {var.op.name: var for var in tf.global_variables()}
def updates(self):
"""Returns a list of update operators for this model.
Returns a list of update operators for this model that must be executed at
each training step. The estimator's train op needs to have a control
dependency on these updates.
Returns:
A list of update operators.
"""
pass
class TrainerTest(tf.test.TestCase):
......
......@@ -662,7 +662,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
anchors_boxlist, clip_window)
else:
anchors_boxlist = box_list_ops.clip_to_window(
anchors_boxlist, clip_window)
anchors_boxlist, clip_window,
filter_nonoverlapping=not self._use_static_shapes)
self._anchors = anchors_boxlist
prediction_dict = {
......@@ -917,12 +918,14 @@ class FasterRCNNMetaArch(model.DetectionModel):
_, num_classes, mask_height, mask_width = (
detection_masks.get_shape().as_list())
_, max_detection = detection_classes.get_shape().as_list()
prediction_dict['mask_predictions'] = tf.reshape(
detection_masks, [-1, num_classes, mask_height, mask_width])
if num_classes > 1:
detection_masks = self._gather_instance_masks(
detection_masks, detection_classes)
prediction_dict[fields.DetectionResultFields.detection_masks] = (
tf.reshape(detection_masks,
tf.reshape(tf.sigmoid(detection_masks),
[batch_size, max_detection, mask_height, mask_width]))
return prediction_dict
......@@ -1159,9 +1162,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
}
# TODO(jrru): Remove mask_predictions from _post_process_box_classifier.
with tf.name_scope('SecondStagePostprocessor'):
if (self._number_of_stages == 2 or
(self._number_of_stages == 3 and self._is_training)):
if (self._number_of_stages == 2 or
(self._number_of_stages == 3 and self._is_training)):
with tf.name_scope('SecondStagePostprocessor'):
mask_predictions = prediction_dict.get(box_predictor.MASK_PREDICTIONS)
detections_dict = self._postprocess_box_classifier(
prediction_dict['refined_box_encodings'],
......@@ -1170,18 +1173,53 @@ class FasterRCNNMetaArch(model.DetectionModel):
prediction_dict['num_proposals'],
true_image_shapes,
mask_predictions=mask_predictions)
return detections_dict
if 'rpn_features_to_crop' in prediction_dict and self._initial_crop_size:
self._add_detection_features_output_node(
detections_dict[fields.DetectionResultFields.detection_boxes],
prediction_dict['rpn_features_to_crop'])
return detections_dict
if self._number_of_stages == 3:
# Post processing is already performed in 3rd stage. We need to transfer
# postprocessed tensors from `prediction_dict` to `detections_dict`.
detections_dict = {}
for key in prediction_dict:
if key == fields.DetectionResultFields.detection_masks:
detections_dict[key] = tf.sigmoid(prediction_dict[key])
elif 'detection' in key:
detections_dict[key] = prediction_dict[key]
return detections_dict
return prediction_dict
def _add_detection_features_output_node(self, detection_boxes,
rpn_features_to_crop):
"""Add the detection features to the output node.
The detection features are from cropping rpn_features with boxes.
Each bounding box has one feature vector of length depth, which comes from
mean_pooling of the cropped rpn_features.
Args:
detection_boxes: a 3-D float32 tensor of shape
[batch_size, max_detection, 4] which represents the bounding boxes.
rpn_features_to_crop: A 4-D float32 tensor with shape
[batch, height, width, depth] representing image features to crop using
the proposals boxes.
"""
with tf.name_scope('SecondStageDetectionFeaturesExtract'):
flattened_detected_feature_maps = (
self._compute_second_stage_input_feature_maps(
rpn_features_to_crop, detection_boxes))
detection_features_unpooled = (
self._feature_extractor.extract_box_classifier_features(
flattened_detected_feature_maps,
scope=self.second_stage_feature_extractor_scope))
batch_size = tf.shape(detection_boxes)[0]
max_detection = tf.shape(detection_boxes)[1]
detection_features_pool = tf.reduce_mean(
detection_features_unpooled, axis=[1, 2])
detection_features = tf.reshape(
detection_features_pool,
[batch_size, max_detection, tf.shape(detection_features_pool)[-1]])
detection_features = tf.identity(
detection_features, 'detection_features')
def _postprocess_rpn(self,
rpn_box_encodings_batch,
......@@ -1454,6 +1492,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
# to cls_weights. This could happen as boxes within certain IOU ranges
# are ignored. If triggered, the selected boxes will still be ignored
# during loss computation.
cls_weights = tf.reduce_mean(cls_weights, axis=-1)
positive_indicator = tf.greater(tf.argmax(cls_targets, axis=1), 0)
valid_indicator = tf.logical_and(
tf.range(proposal_boxlist.num_boxes()) < num_valid_proposals,
......@@ -1566,6 +1605,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
mask_predictions_batch = tf.reshape(
mask_predictions, [-1, self.max_num_proposals,
self.num_classes, mask_height, mask_width])
(nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, _,
num_detections) = self._second_stage_nms_fn(
refined_decoded_boxes_batch,
......@@ -1713,6 +1753,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
gt_box_batch=groundtruth_boxlists,
gt_class_targets_batch=(len(groundtruth_boxlists) * [None]),
gt_weights_batch=groundtruth_weights_list)
batch_cls_weights = tf.reduce_mean(batch_cls_weights, axis=2)
batch_cls_targets = tf.squeeze(batch_cls_targets, axis=2)
def _minibatch_subsample_fn(inputs):
......@@ -1743,7 +1784,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
losses_mask=losses_mask)
objectness_losses = self._first_stage_objectness_loss(
rpn_objectness_predictions_with_background,
batch_one_hot_targets, weights=batch_sampled_indices,
batch_one_hot_targets,
weights=tf.expand_dims(batch_sampled_indices, axis=-1),
losses_mask=losses_mask)
localization_loss = tf.reduce_mean(
tf.reduce_sum(localization_losses, axis=1) / normalizer)
......@@ -1960,25 +2002,28 @@ class FasterRCNNMetaArch(model.DetectionModel):
tf.expand_dims(flat_gt_masks, -1),
tf.expand_dims(flat_normalized_proposals, axis=1),
[mask_height, mask_width])
# Without stopping gradients into cropped groundtruth masks the
# performance with 100-padded groundtruth masks when batch size > 1 is
# about 4% worse.
# TODO(rathodv): Investigate this since we don't expect any variables
# upstream of flat_cropped_gt_mask.
flat_cropped_gt_mask = tf.stop_gradient(flat_cropped_gt_mask)
batch_cropped_gt_mask = tf.reshape(
flat_cropped_gt_mask,
[batch_size, -1, mask_height * mask_width])
second_stage_mask_losses = ops.reduce_sum_trailing_dimensions(
self._second_stage_mask_loss(
reshaped_prediction_masks,
batch_cropped_gt_mask,
weights=batch_mask_target_weights,
losses_mask=losses_mask),
ndims=2) / (
mask_height * mask_width * tf.maximum(
tf.reduce_sum(
batch_mask_target_weights, axis=1, keep_dims=True
), tf.ones((batch_size, 1))))
second_stage_mask_loss = tf.reduce_sum(
tf.where(paddings_indicator, second_stage_mask_losses,
tf.zeros_like(second_stage_mask_losses)))
mask_losses_weights = (
batch_mask_target_weights * tf.to_float(paddings_indicator))
mask_losses = self._second_stage_mask_loss(
reshaped_prediction_masks,
batch_cropped_gt_mask,
weights=tf.expand_dims(mask_losses_weights, axis=-1),
losses_mask=losses_mask)
total_mask_loss = tf.reduce_sum(mask_losses)
normalizer = tf.maximum(
tf.reduce_sum(mask_losses_weights * mask_height * mask_width), 1.0)
second_stage_mask_loss = total_mask_loss / normalizer
if second_stage_mask_loss is not None:
mask_loss = tf.multiply(self._second_stage_mask_loss_weight,
......@@ -2073,6 +2118,17 @@ class FasterRCNNMetaArch(model.DetectionModel):
cls_losses=tf.expand_dims(single_image_cls_loss, 0),
decoded_boxlist_list=[proposal_boxlist])
def regularization_losses(self):
"""Returns a list of regularization losses for this model.
Returns a list of regularization losses for this model that the estimator
needs to use during training/optimization.
Returns:
A list of regularization loss tensors.
"""
return tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
def restore_map(self,
fine_tune_checkpoint_type='detection',
load_all_detection_checkpoint_vars=False):
......@@ -2117,3 +2173,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
feature_extractor_variables = tf.contrib.framework.filter_variables(
variables_to_restore, include_patterns=include_patterns)
return {var.op.name: var for var in feature_extractor_variables}
def updates(self):
"""Returns a list of update operators for this model.
Returns a list of update operators for this model that must be executed at
each training step. The estimator's train op needs to have a control
dependency on these updates.
Returns:
A list of update operators.
"""
return tf.get_collection(tf.GraphKeys.UPDATE_OPS)
......@@ -189,7 +189,7 @@ class FasterRCNNMetaArchTest(
set(expected_shapes.keys()).union(
set([
'detection_boxes', 'detection_scores', 'detection_classes',
'detection_masks', 'num_detections'
'detection_masks', 'num_detections', 'mask_predictions',
])))
for key in expected_shapes:
self.assertAllEqual(tensor_dict_out[key].shape, expected_shapes[key])
......@@ -199,6 +199,9 @@ class FasterRCNNMetaArchTest(
self.assertAllEqual(tensor_dict_out['detection_classes'].shape, [2, 5])
self.assertAllEqual(tensor_dict_out['detection_scores'].shape, [2, 5])
self.assertAllEqual(tensor_dict_out['num_detections'].shape, [2])
num_classes = 1 if masks_are_class_agnostic else 2
self.assertAllEqual(tensor_dict_out['mask_predictions'].shape,
[10, num_classes, 14, 14])
@parameterized.parameters(
{'masks_are_class_agnostic': False},
......
......@@ -250,6 +250,7 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
iou_threshold: 1.0
max_detections_per_class: 5
max_total_detections: 5
use_static_shapes: """ +'{}'.format(use_static_shapes) + """
}
"""
post_processing_config = post_processing_pb2.PostProcessing()
......@@ -336,61 +337,71 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
masks_are_class_agnostic=masks_are_class_agnostic), **common_kwargs)
def test_predict_gives_correct_shapes_in_inference_mode_first_stage_only(
self):
test_graph = tf.Graph()
with test_graph.as_default():
model = self._build_model(
is_training=False, number_of_stages=1, second_stage_batch_size=2)
batch_size = 2
height = 10
width = 12
input_image_shape = (batch_size, height, width, 3)
self, use_static_shapes=False):
batch_size = 2
height = 10
width = 12
input_image_shape = (batch_size, height, width, 3)
_, true_image_shapes = model.preprocess(tf.zeros(input_image_shape))
preprocessed_inputs = tf.placeholder(
dtype=tf.float32, shape=(batch_size, None, None, 3))
def graph_fn(images):
"""Function to construct tf graph for the test."""
model = self._build_model(
is_training=False,
number_of_stages=1,
second_stage_batch_size=2,
clip_anchors_to_image=use_static_shapes,
use_static_shapes=use_static_shapes)
preprocessed_inputs, true_image_shapes = model.preprocess(images)
prediction_dict = model.predict(preprocessed_inputs, true_image_shapes)
return (prediction_dict['rpn_box_predictor_features'],
prediction_dict['rpn_features_to_crop'],
prediction_dict['image_shape'],
prediction_dict['rpn_box_encodings'],
prediction_dict['rpn_objectness_predictions_with_background'],
prediction_dict['anchors'])
images = np.zeros(input_image_shape, dtype=np.float32)
# In inference mode, anchors are clipped to the image window, but not
# pruned. Since MockFasterRCNN.extract_proposal_features returns a
# tensor with the same shape as its input, the expected number of anchors
# is height * width * the number of anchors per location (i.e. 3x3).
expected_num_anchors = height * width * 3 * 3
expected_output_shapes = {
'rpn_box_predictor_features': (batch_size, height, width, 512),
'rpn_features_to_crop': (batch_size, height, width, 3),
'rpn_box_encodings': (batch_size, expected_num_anchors, 4),
'rpn_objectness_predictions_with_background':
(batch_size, expected_num_anchors, 2),
'anchors': (expected_num_anchors, 4)
}
# In inference mode, anchors are clipped to the image window, but not
# pruned. Since MockFasterRCNN.extract_proposal_features returns a
# tensor with the same shape as its input, the expected number of anchors
# is height * width * the number of anchors per location (i.e. 3x3).
expected_num_anchors = height * width * 3 * 3
expected_output_keys = set([
'rpn_box_predictor_features', 'rpn_features_to_crop', 'image_shape',
'rpn_box_encodings', 'rpn_objectness_predictions_with_background',
'anchors'])
expected_output_shapes = {
'rpn_box_predictor_features': (batch_size, height, width, 512),
'rpn_features_to_crop': (batch_size, height, width, 3),
'rpn_box_encodings': (batch_size, expected_num_anchors, 4),
'rpn_objectness_predictions_with_background':
(batch_size, expected_num_anchors, 2),
'anchors': (expected_num_anchors, 4)
}
init_op = tf.global_variables_initializer()
with self.test_session(graph=test_graph) as sess:
sess.run(init_op)
prediction_out = sess.run(prediction_dict,
feed_dict={
preprocessed_inputs:
np.zeros(input_image_shape)
})
self.assertEqual(set(prediction_out.keys()), expected_output_keys)
if use_static_shapes:
results = self.execute(graph_fn, [images])
else:
results = self.execute_cpu(graph_fn, [images])
self.assertAllEqual(prediction_out['image_shape'], input_image_shape)
for output_key, expected_shape in expected_output_shapes.items():
self.assertAllEqual(prediction_out[output_key].shape, expected_shape)
self.assertAllEqual(results[0].shape,
expected_output_shapes['rpn_box_predictor_features'])
self.assertAllEqual(results[1].shape,
expected_output_shapes['rpn_features_to_crop'])
self.assertAllEqual(results[2],
input_image_shape)
self.assertAllEqual(results[3].shape,
expected_output_shapes['rpn_box_encodings'])
self.assertAllEqual(
results[4].shape,
expected_output_shapes['rpn_objectness_predictions_with_background'])
self.assertAllEqual(results[5].shape,
expected_output_shapes['anchors'])
# Check that anchors are clipped to window.
anchors = prediction_out['anchors']
self.assertTrue(np.all(np.greater_equal(anchors, 0)))
self.assertTrue(np.all(np.less_equal(anchors[:, 0], height)))
self.assertTrue(np.all(np.less_equal(anchors[:, 1], width)))
self.assertTrue(np.all(np.less_equal(anchors[:, 2], height)))
self.assertTrue(np.all(np.less_equal(anchors[:, 3], width)))
# Check that anchors are clipped to window.
anchors = results[5]
self.assertTrue(np.all(np.greater_equal(anchors, 0)))
self.assertTrue(np.all(np.less_equal(anchors[:, 0], height)))
self.assertTrue(np.all(np.less_equal(anchors[:, 1], width)))
self.assertTrue(np.all(np.less_equal(anchors[:, 2], height)))
self.assertTrue(np.all(np.less_equal(anchors[:, 3], width)))
def test_predict_gives_valid_anchors_in_training_mode_first_stage_only(self):
test_graph = tf.Graph()
......@@ -446,7 +457,38 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
prediction_out['rpn_objectness_predictions_with_background'].shape,
(batch_size, num_anchors_out, 2))
def test_predict_correct_shapes_in_inference_mode_two_stages(self):
def test_predict_correct_shapes_in_inference_mode_two_stages(
self, use_static_shapes=False):
def compare_results(results, expected_output_shapes):
"""Checks if the shape of the predictions are as expected."""
self.assertAllEqual(results[0].shape,
expected_output_shapes['rpn_box_predictor_features'])
self.assertAllEqual(results[1].shape,
expected_output_shapes['rpn_features_to_crop'])
self.assertAllEqual(results[2].shape,
expected_output_shapes['image_shape'])
self.assertAllEqual(results[3].shape,
expected_output_shapes['rpn_box_encodings'])
self.assertAllEqual(
results[4].shape,
expected_output_shapes['rpn_objectness_predictions_with_background'])
self.assertAllEqual(results[5].shape,
expected_output_shapes['anchors'])
self.assertAllEqual(results[6].shape,
expected_output_shapes['refined_box_encodings'])
self.assertAllEqual(
results[7].shape,
expected_output_shapes['class_predictions_with_background'])
self.assertAllEqual(results[8].shape,
expected_output_shapes['num_proposals'])
self.assertAllEqual(results[9].shape,
expected_output_shapes['proposal_boxes'])
self.assertAllEqual(results[10].shape,
expected_output_shapes['proposal_boxes_normalized'])
self.assertAllEqual(results[11].shape,
expected_output_shapes['box_classifier_features'])
batch_size = 2
image_size = 10
max_num_proposals = 8
......@@ -457,6 +499,32 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
(None, image_size, image_size, 3),
(batch_size, None, None, 3),
(None, None, None, 3)]
def graph_fn_tpu(images):
"""Function to construct tf graph for the test."""
model = self._build_model(
is_training=False,
number_of_stages=2,
second_stage_batch_size=2,
predict_masks=False,
use_matmul_crop_and_resize=use_static_shapes,
clip_anchors_to_image=use_static_shapes,
use_static_shapes=use_static_shapes)
preprocessed_inputs, true_image_shapes = model.preprocess(images)
prediction_dict = model.predict(preprocessed_inputs, true_image_shapes)
return (prediction_dict['rpn_box_predictor_features'],
prediction_dict['rpn_features_to_crop'],
prediction_dict['image_shape'],
prediction_dict['rpn_box_encodings'],
prediction_dict['rpn_objectness_predictions_with_background'],
prediction_dict['anchors'],
prediction_dict['refined_box_encodings'],
prediction_dict['class_predictions_with_background'],
prediction_dict['num_proposals'],
prediction_dict['proposal_boxes'],
prediction_dict['proposal_boxes_normalized'],
prediction_dict['box_classifier_features'])
expected_num_anchors = image_size * image_size * 3 * 3
expected_shapes = {
'rpn_box_predictor_features':
......@@ -481,28 +549,34 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
3)
}
for input_shape in input_shapes:
test_graph = tf.Graph()
with test_graph.as_default():
model = self._build_model(
is_training=False,
number_of_stages=2,
second_stage_batch_size=2,
predict_masks=False)
preprocessed_inputs = tf.placeholder(tf.float32, shape=input_shape)
_, true_image_shapes = model.preprocess(preprocessed_inputs)
result_tensor_dict = model.predict(
preprocessed_inputs, true_image_shapes)
init_op = tf.global_variables_initializer()
with self.test_session(graph=test_graph) as sess:
sess.run(init_op)
tensor_dict_out = sess.run(result_tensor_dict, feed_dict={
preprocessed_inputs:
np.zeros((batch_size, image_size, image_size, 3))})
self.assertEqual(set(tensor_dict_out.keys()),
set(expected_shapes.keys()))
for key in expected_shapes:
self.assertAllEqual(tensor_dict_out[key].shape, expected_shapes[key])
if use_static_shapes:
input_shape = (batch_size, image_size, image_size, 3)
images = np.zeros(input_shape, dtype=np.float32)
results = self.execute(graph_fn_tpu, [images])
compare_results(results, expected_shapes)
else:
for input_shape in input_shapes:
test_graph = tf.Graph()
with test_graph.as_default():
model = self._build_model(
is_training=False,
number_of_stages=2,
second_stage_batch_size=2,
predict_masks=False)
preprocessed_inputs = tf.placeholder(tf.float32, shape=input_shape)
_, true_image_shapes = model.preprocess(preprocessed_inputs)
result_tensor_dict = model.predict(
preprocessed_inputs, true_image_shapes)
init_op = tf.global_variables_initializer()
with self.test_session(graph=test_graph) as sess:
sess.run(init_op)
tensor_dict_out = sess.run(result_tensor_dict, feed_dict={
preprocessed_inputs:
np.zeros((batch_size, image_size, image_size, 3))})
self.assertEqual(set(tensor_dict_out.keys()),
set(expected_shapes.keys()))
for key in expected_shapes:
self.assertAllEqual(tensor_dict_out[key].shape, expected_shapes[key])
def test_predict_gives_correct_shapes_in_train_mode_both_stages(
self,
......@@ -596,23 +670,46 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
self.assertAllEqual(results[8].shape,
expected_shapes['rpn_box_predictor_features'])
def _test_postprocess_first_stage_only_inference_mode(
self, pad_to_max_dimension=None):
model = self._build_model(
is_training=False, number_of_stages=1, second_stage_batch_size=6,
pad_to_max_dimension=pad_to_max_dimension)
def test_postprocess_first_stage_only_inference_mode(
self, use_static_shapes=False, pad_to_max_dimension=None):
batch_size = 2
anchors = tf.constant(
first_stage_max_proposals = 4 if use_static_shapes else 8
def graph_fn(images,
rpn_box_encodings,
rpn_objectness_predictions_with_background,
rpn_features_to_crop,
anchors):
"""Function to construct tf graph for the test."""
model = self._build_model(
is_training=False, number_of_stages=1, second_stage_batch_size=6,
use_matmul_crop_and_resize=use_static_shapes,
clip_anchors_to_image=use_static_shapes,
use_static_shapes=use_static_shapes,
use_matmul_gather_in_matcher=use_static_shapes,
first_stage_max_proposals=first_stage_max_proposals,
pad_to_max_dimension=pad_to_max_dimension)
_, true_image_shapes = model.preprocess(images)
proposals = model.postprocess({
'rpn_box_encodings': rpn_box_encodings,
'rpn_objectness_predictions_with_background':
rpn_objectness_predictions_with_background,
'rpn_features_to_crop': rpn_features_to_crop,
'anchors': anchors}, true_image_shapes)
return (proposals['num_detections'],
proposals['detection_boxes'],
proposals['detection_scores'])
anchors = np.array(
[[0, 0, 16, 16],
[0, 16, 16, 32],
[16, 0, 32, 16],
[16, 16, 32, 32]], dtype=tf.float32)
rpn_box_encodings = tf.zeros(
[batch_size, anchors.get_shape().as_list()[0],
BOX_CODE_SIZE], dtype=tf.float32)
[16, 16, 32, 32]], dtype=np.float32)
rpn_box_encodings = np.zeros(
(batch_size, anchors.shape[0], BOX_CODE_SIZE), dtype=np.float32)
# use different numbers for the objectness category to break ties in
# order of boxes returned by NMS
rpn_objectness_predictions_with_background = tf.constant([
rpn_objectness_predictions_with_background = np.array([
[[-10, 13],
[10, -10],
[10, -11],
......@@ -620,16 +717,22 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
[[10, -10],
[-10, 13],
[-10, 12],
[10, -11]]], dtype=tf.float32)
rpn_features_to_crop = tf.ones((batch_size, 8, 8, 10), dtype=tf.float32)
image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32)
_, true_image_shapes = model.preprocess(tf.zeros(image_shape))
proposals = model.postprocess({
'rpn_box_encodings': rpn_box_encodings,
'rpn_objectness_predictions_with_background':
rpn_objectness_predictions_with_background,
'rpn_features_to_crop': rpn_features_to_crop,
'anchors': anchors}, true_image_shapes)
[10, -11]]], dtype=np.float32)
rpn_features_to_crop = np.ones((batch_size, 8, 8, 10), dtype=np.float32)
image_shape = (batch_size, 32, 32, 3)
images = np.zeros(image_shape, dtype=np.float32)
if use_static_shapes:
results = self.execute(graph_fn,
[images, rpn_box_encodings,
rpn_objectness_predictions_with_background,
rpn_features_to_crop, anchors])
else:
results = self.execute_cpu(graph_fn,
[images, rpn_box_encodings,
rpn_objectness_predictions_with_background,
rpn_features_to_crop, anchors])
expected_proposal_boxes = [
[[0, 0, .5, .5], [.5, .5, 1, 1], [0, .5, .5, 1], [.5, 0, 1.0, .5]]
+ 4 * [4 * [0]],
......@@ -639,24 +742,12 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
[1, 1, 0, 0, 0, 0, 0, 0]]
expected_num_proposals = [4, 4]
expected_output_keys = set(['detection_boxes', 'detection_scores',
'num_detections'])
self.assertEqual(set(proposals.keys()), expected_output_keys)
with self.test_session() as sess:
proposals_out = sess.run(proposals)
self.assertAllClose(proposals_out['detection_boxes'],
expected_proposal_boxes)
self.assertAllClose(proposals_out['detection_scores'],
expected_proposal_scores)
self.assertAllEqual(proposals_out['num_detections'],
expected_num_proposals)
def test_postprocess_first_stage_only_inference_mode(self):
self._test_postprocess_first_stage_only_inference_mode()
def test_postprocess_first_stage_only_inference_mode_padded_image(self):
self._test_postprocess_first_stage_only_inference_mode(
pad_to_max_dimension=56)
self.assertAllClose(results[0], expected_num_proposals)
for indx, num_proposals in enumerate(expected_num_proposals):
self.assertAllClose(results[1][indx][0:num_proposals],
expected_proposal_boxes[indx][0:num_proposals])
self.assertAllClose(results[2][indx][0:num_proposals],
expected_proposal_scores[indx][0:num_proposals])
def _test_postprocess_first_stage_only_train_mode(self,
pad_to_max_dimension=None):
......@@ -733,83 +824,80 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
def test_postprocess_first_stage_only_train_mode_padded_image(self):
self._test_postprocess_first_stage_only_train_mode(pad_to_max_dimension=56)
def _test_postprocess_second_stage_only_inference_mode(
self, pad_to_max_dimension=None):
num_proposals_shapes = [(2), (None,)]
refined_box_encodings_shapes = [(16, 2, 4), (None, 2, 4)]
class_predictions_with_background_shapes = [(16, 3), (None, 3)]
proposal_boxes_shapes = [(2, 8, 4), (None, 8, 4)]
def test_postprocess_second_stage_only_inference_mode(
self, use_static_shapes=False, pad_to_max_dimension=None):
batch_size = 2
num_classes = 2
image_shape = np.array((2, 36, 48, 3), dtype=np.int32)
for (num_proposals_shape, refined_box_encoding_shape,
class_predictions_with_background_shape,
proposal_boxes_shape) in zip(num_proposals_shapes,
refined_box_encodings_shapes,
class_predictions_with_background_shapes,
proposal_boxes_shapes):
tf_graph = tf.Graph()
with tf_graph.as_default():
model = self._build_model(
is_training=False, number_of_stages=2,
second_stage_batch_size=6,
pad_to_max_dimension=pad_to_max_dimension)
_, true_image_shapes = model.preprocess(tf.zeros(image_shape))
total_num_padded_proposals = batch_size * model.max_num_proposals
proposal_boxes = np.array(
[[[1, 1, 2, 3],
[0, 0, 1, 1],
[.5, .5, .6, .6],
4*[0], 4*[0], 4*[0], 4*[0], 4*[0]],
[[2, 3, 6, 8],
[1, 2, 5, 3],
4*[0], 4*[0], 4*[0], 4*[0], 4*[0], 4*[0]]])
num_proposals = np.array([3, 2], dtype=np.int32)
refined_box_encodings = np.zeros(
[total_num_padded_proposals, model.num_classes, 4])
class_predictions_with_background = np.ones(
[total_num_padded_proposals, model.num_classes+1])
num_proposals_placeholder = tf.placeholder(tf.int32,
shape=num_proposals_shape)
refined_box_encodings_placeholder = tf.placeholder(
tf.float32, shape=refined_box_encoding_shape)
class_predictions_with_background_placeholder = tf.placeholder(
tf.float32, shape=class_predictions_with_background_shape)
proposal_boxes_placeholder = tf.placeholder(
tf.float32, shape=proposal_boxes_shape)
image_shape_placeholder = tf.placeholder(tf.int32, shape=(4))
detections = model.postprocess({
'refined_box_encodings': refined_box_encodings_placeholder,
'class_predictions_with_background':
class_predictions_with_background_placeholder,
'num_proposals': num_proposals_placeholder,
'proposal_boxes': proposal_boxes_placeholder,
}, true_image_shapes)
with self.test_session(graph=tf_graph) as sess:
detections_out = sess.run(
detections,
feed_dict={
refined_box_encodings_placeholder: refined_box_encodings,
class_predictions_with_background_placeholder:
class_predictions_with_background,
num_proposals_placeholder: num_proposals,
proposal_boxes_placeholder: proposal_boxes,
image_shape_placeholder: image_shape
})
self.assertAllEqual(detections_out['detection_boxes'].shape, [2, 5, 4])
self.assertAllClose(detections_out['detection_scores'],
[[1, 1, 1, 1, 1], [1, 1, 1, 1, 0]])
self.assertAllClose(detections_out['detection_classes'],
[[0, 0, 0, 1, 1], [0, 0, 1, 1, 0]])
self.assertAllClose(detections_out['num_detections'], [5, 4])
def test_postprocess_second_stage_only_inference_mode(self):
self._test_postprocess_second_stage_only_inference_mode()
def test_postprocess_second_stage_only_inference_mode_padded_image(self):
self._test_postprocess_second_stage_only_inference_mode(
pad_to_max_dimension=56)
first_stage_max_proposals = 8
total_num_padded_proposals = batch_size * first_stage_max_proposals
def graph_fn(images,
refined_box_encodings,
class_predictions_with_background,
num_proposals,
proposal_boxes):
"""Function to construct tf graph for the test."""
model = self._build_model(
is_training=False, number_of_stages=2,
second_stage_batch_size=6,
use_matmul_crop_and_resize=use_static_shapes,
clip_anchors_to_image=use_static_shapes,
use_static_shapes=use_static_shapes,
use_matmul_gather_in_matcher=use_static_shapes,
pad_to_max_dimension=pad_to_max_dimension)
_, true_image_shapes = model.preprocess(images)
detections = model.postprocess({
'refined_box_encodings': refined_box_encodings,
'class_predictions_with_background':
class_predictions_with_background,
'num_proposals': num_proposals,
'proposal_boxes': proposal_boxes,
}, true_image_shapes)
return (detections['num_detections'],
detections['detection_boxes'],
detections['detection_scores'],
detections['detection_classes'])
proposal_boxes = np.array(
[[[1, 1, 2, 3],
[0, 0, 1, 1],
[.5, .5, .6, .6],
4*[0], 4*[0], 4*[0], 4*[0], 4*[0]],
[[2, 3, 6, 8],
[1, 2, 5, 3],
4*[0], 4*[0], 4*[0], 4*[0], 4*[0], 4*[0]]], dtype=np.float32)
num_proposals = np.array([3, 2], dtype=np.int32)
refined_box_encodings = np.zeros(
[total_num_padded_proposals, num_classes, 4], dtype=np.float32)
class_predictions_with_background = np.ones(
[total_num_padded_proposals, num_classes+1], dtype=np.float32)
images = np.zeros(image_shape, dtype=np.float32)
if use_static_shapes:
results = self.execute(graph_fn,
[images, refined_box_encodings,
class_predictions_with_background,
num_proposals, proposal_boxes])
else:
results = self.execute_cpu(graph_fn,
[images, refined_box_encodings,
class_predictions_with_background,
num_proposals, proposal_boxes])
expected_num_detections = [5, 4]
expected_detection_classes = [[0, 0, 0, 1, 1], [0, 0, 1, 1, 0]]
expected_detection_scores = [[1, 1, 1, 1, 1], [1, 1, 1, 1, 0]]
self.assertAllClose(results[0], expected_num_detections)
for indx, num_proposals in enumerate(expected_num_detections):
self.assertAllClose(results[2][indx][0:num_proposals],
expected_detection_scores[indx][0:num_proposals])
self.assertAllClose(results[3][indx][0:num_proposals],
expected_detection_classes[indx][0:num_proposals])
if not use_static_shapes:
self.assertAllEqual(results[1].shape, [2, 5, 4])
def test_preprocess_preserves_input_shapes(self):
image_shapes = [(3, None, None, 3),
......
......@@ -19,7 +19,6 @@ models.
"""
from abc import abstractmethod
import re
import tensorflow as tf
from object_detection.core import box_list
......@@ -116,6 +115,25 @@ class SSDFeatureExtractor(object):
"""
raise NotImplementedError
def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
"""Returns a map of variables to load from a foreign checkpoint.
Args:
feature_extractor_scope: A scope name for the feature extractor.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
the model graph.
"""
variables_to_restore = {}
for variable in tf.global_variables():
var_name = variable.op.name
if var_name.startswith(feature_extractor_scope + '/'):
var_name = var_name.replace(feature_extractor_scope + '/', '')
variables_to_restore[var_name] = variable
return variables_to_restore
class SSDKerasFeatureExtractor(tf.keras.Model):
"""SSD Feature Extractor definition."""
......@@ -218,6 +236,25 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
def call(self, inputs, **kwargs):
return self._extract_features(inputs)
def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
"""Returns a map of variables to load from a foreign checkpoint.
Args:
feature_extractor_scope: A scope name for the feature extractor.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
the model graph.
"""
variables_to_restore = {}
for variable in tf.global_variables():
var_name = variable.op.name
if var_name.startswith(feature_extractor_scope + '/'):
var_name = var_name.replace(feature_extractor_scope + '/', '')
variables_to_restore[var_name] = variable
return variables_to_restore
class SSDMetaArch(model.DetectionModel):
"""SSD Meta-architecture definition."""
......@@ -333,13 +370,15 @@ class SSDMetaArch(model.DetectionModel):
# Slim feature extractors get an explicit naming scope
self._extract_features_scope = 'FeatureExtractor'
# TODO(jonathanhuang): handle agnostic mode
# weights
self._unmatched_class_label = tf.constant([1] + self.num_classes * [0],
tf.float32)
if encode_background_as_zeros:
if self._add_background_class and encode_background_as_zeros:
self._unmatched_class_label = tf.constant((self.num_classes + 1) * [0],
tf.float32)
elif self._add_background_class:
self._unmatched_class_label = tf.constant([1] + self.num_classes * [0],
tf.float32)
else:
self._unmatched_class_label = tf.constant(self.num_classes * [0],
tf.float32)
self._target_assigner = target_assigner_instance
......@@ -606,14 +645,22 @@ class SSDMetaArch(model.DetectionModel):
detection_boxes = tf.identity(detection_boxes, 'raw_box_locations')
detection_boxes = tf.expand_dims(detection_boxes, axis=2)
detection_scores_with_background = self._score_conversion_fn(
class_predictions)
detection_scores_with_background = tf.identity(
detection_scores_with_background, 'raw_box_scores')
detection_scores = tf.slice(detection_scores_with_background, [0, 0, 1],
[-1, -1, -1])
detection_scores = self._score_conversion_fn(class_predictions)
detection_scores = tf.identity(detection_scores, 'raw_box_scores')
if self._add_background_class:
detection_scores = tf.slice(detection_scores, [0, 0, 1], [-1, -1, -1])
additional_fields = None
batch_size = (
shape_utils.combined_static_and_dynamic_shape(preprocessed_images)[0])
if 'feature_maps' in prediction_dict:
feature_map_list = []
for feature_map in prediction_dict['feature_maps']:
feature_map_list.append(tf.reshape(feature_map, [batch_size, -1]))
box_features = tf.concat(feature_map_list, 1)
box_features = tf.identity(box_features, 'raw_box_features')
if detection_keypoints is not None:
additional_fields = {
fields.BoxListFields.keypoints: detection_keypoints}
......@@ -683,17 +730,20 @@ class SSDMetaArch(model.DetectionModel):
self.groundtruth_lists(fields.BoxListFields.boxes), match_list)
if self._random_example_sampler:
batch_cls_per_anchor_weights = tf.reduce_mean(
batch_cls_weights, axis=-1)
batch_sampled_indicator = tf.to_float(
shape_utils.static_or_dynamic_map_fn(
self._minibatch_subsample_fn,
[batch_cls_targets, batch_cls_weights],
[batch_cls_targets, batch_cls_per_anchor_weights],
dtype=tf.bool,
parallel_iterations=self._parallel_iterations,
back_prop=True))
batch_reg_weights = tf.multiply(batch_sampled_indicator,
batch_reg_weights)
batch_cls_weights = tf.multiply(batch_sampled_indicator,
batch_cls_weights)
batch_cls_weights = tf.multiply(
tf.expand_dims(batch_sampled_indicator, -1),
batch_cls_weights)
losses_mask = None
if self.groundtruth_has_field(fields.InputDataFields.is_annotated):
......@@ -713,16 +763,32 @@ class SSDMetaArch(model.DetectionModel):
losses_mask=losses_mask)
if self._expected_classification_loss_under_sampling:
# Need to compute losses for assigned targets against the
# unmatched_class_label as well as their assigned targets.
# simplest thing (but wasteful) is just to calculate all losses
# twice
batch_size, num_anchors, num_classes = batch_cls_targets.get_shape()
unmatched_targets = tf.ones([batch_size, num_anchors, 1
]) * self._unmatched_class_label
unmatched_cls_losses = self._classification_loss(
prediction_dict['class_predictions_with_background'],
unmatched_targets,
weights=batch_cls_weights,
losses_mask=losses_mask)
if cls_losses.get_shape().ndims == 3:
batch_size, num_anchors, num_classes = cls_losses.get_shape()
cls_losses = tf.reshape(cls_losses, [batch_size, -1])
unmatched_cls_losses = tf.reshape(unmatched_cls_losses,
[batch_size, -1])
batch_cls_targets = tf.reshape(
batch_cls_targets, [batch_size, num_anchors * num_classes, -1])
batch_cls_targets = tf.concat(
[1 - batch_cls_targets, batch_cls_targets], axis=-1)
cls_losses = self._expected_classification_loss_under_sampling(
batch_cls_targets, cls_losses)
batch_cls_targets, cls_losses, unmatched_cls_losses)
classification_loss = tf.reduce_sum(cls_losses)
localization_loss = tf.reduce_sum(location_losses)
......@@ -971,6 +1037,26 @@ class SSDMetaArch(model.DetectionModel):
[combined_shape[0], combined_shape[1], 4]))
return decoded_boxes, decoded_keypoints
def regularization_losses(self):
"""Returns a list of regularization losses for this model.
Returns a list of regularization losses for this model that the estimator
needs to use during training/optimization.
Returns:
A list of regularization loss tensors.
"""
losses = []
slim_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
# Copy the slim losses to avoid modifying the collection
if slim_losses:
losses.extend(slim_losses)
if self._box_predictor.is_keras_model:
losses.extend(self._box_predictor.losses)
if self._feature_extractor.is_keras_model:
losses.extend(self._feature_extractor.losses)
return losses
def restore_map(self,
fine_tune_checkpoint_type='detection',
load_all_detection_checkpoint_vars=False):
......@@ -997,18 +1083,44 @@ class SSDMetaArch(model.DetectionModel):
if fine_tune_checkpoint_type not in ['detection', 'classification']:
raise ValueError('Not supported fine_tune_checkpoint_type: {}'.format(
fine_tune_checkpoint_type))
variables_to_restore = {}
for variable in tf.global_variables():
var_name = variable.op.name
if (fine_tune_checkpoint_type == 'detection' and
load_all_detection_checkpoint_vars):
variables_to_restore[var_name] = variable
else:
if var_name.startswith(self._extract_features_scope):
if fine_tune_checkpoint_type == 'classification':
var_name = (
re.split('^' + self._extract_features_scope + '/',
var_name)[-1])
if fine_tune_checkpoint_type == 'classification':
return self._feature_extractor.restore_from_classification_checkpoint_fn(
self._extract_features_scope)
if fine_tune_checkpoint_type == 'detection':
variables_to_restore = {}
for variable in tf.global_variables():
var_name = variable.op.name
if load_all_detection_checkpoint_vars:
variables_to_restore[var_name] = variable
else:
if var_name.startswith(self._extract_features_scope):
variables_to_restore[var_name] = variable
return variables_to_restore
def updates(self):
"""Returns a list of update operators for this model.
Returns a list of update operators for this model that must be executed at
each training step. The estimator's train op needs to have a control
dependency on these updates.
Returns:
A list of update operators.
"""
update_ops = []
slim_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
# Copy the slim ops to avoid modifying the collection
if slim_update_ops:
update_ops.extend(slim_update_ops)
if self._box_predictor.is_keras_model:
update_ops.extend(self._box_predictor.get_updates_for(None))
update_ops.extend(self._box_predictor.get_updates_for(
self._box_predictor.inputs))
if self._feature_extractor.is_keras_model:
update_ops.extend(self._feature_extractor.get_updates_for(None))
update_ops.extend(self._feature_extractor.get_updates_for(
self._feature_extractor.inputs))
return update_ops
......@@ -42,7 +42,7 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
random_example_sampling=False,
weight_regression_loss_by_score=False,
use_expected_classification_loss_under_sampling=False,
minimum_negative_sampling=1,
min_num_negative_samples=1,
desired_negative_sampling_ratio=3,
use_keras=False,
predict_mask=False,
......@@ -57,7 +57,7 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
weight_regression_loss_by_score=weight_regression_loss_by_score,
use_expected_classification_loss_under_sampling=
use_expected_classification_loss_under_sampling,
minimum_negative_sampling=minimum_negative_sampling,
min_num_negative_samples=min_num_negative_samples,
desired_negative_sampling_ratio=desired_negative_sampling_ratio,
use_keras=use_keras,
predict_mask=predict_mask,
......@@ -344,11 +344,11 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32)
groundtruth_boxes1 = np.array([[0, 0, .5, .5]], dtype=np.float32)
groundtruth_boxes2 = np.array([[0, 0, .5, .5]], dtype=np.float32)
groundtruth_classes1 = np.array([[0, 1]], dtype=np.float32)
groundtruth_classes2 = np.array([[0, 1]], dtype=np.float32)
groundtruth_classes1 = np.array([[1]], dtype=np.float32)
groundtruth_classes2 = np.array([[1]], dtype=np.float32)
expected_localization_loss = 0.0
expected_classification_loss = (
batch_size * num_anchors * (num_classes + 1) * np.log(2.0))
batch_size * num_anchors * num_classes * np.log(2.0))
(localization_loss, classification_loss) = self.execute(
graph_fn, [
preprocessed_input, groundtruth_boxes1, groundtruth_boxes2,
......@@ -371,7 +371,7 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
apply_hard_mining=False,
add_background_class=True,
use_expected_classification_loss_under_sampling=True,
minimum_negative_sampling=1,
min_num_negative_samples=1,
desired_negative_sampling_ratio=desired_negative_sampling_ratio)
model.provide_groundtruth(groundtruth_boxes_list,
groundtruth_classes_list)
......@@ -391,8 +391,7 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
expected_localization_loss = 0.0
expected_classification_loss = (
batch_size * (desired_negative_sampling_ratio * num_anchors +
num_classes * num_anchors) * np.log(2.0))
batch_size * (num_anchors + num_classes * num_anchors) * np.log(2.0))
(localization_loss, classification_loss) = self.execute(
graph_fn, [
preprocessed_input, groundtruth_boxes1, groundtruth_boxes2,
......@@ -432,11 +431,11 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32)
groundtruth_boxes1 = np.array([[0, 0, 1, 1]], dtype=np.float32)
groundtruth_boxes2 = np.array([[0, 0, 1, 1]], dtype=np.float32)
groundtruth_classes1 = np.array([[0, 1]], dtype=np.float32)
groundtruth_classes2 = np.array([[1, 0]], dtype=np.float32)
groundtruth_classes1 = np.array([[1]], dtype=np.float32)
groundtruth_classes2 = np.array([[0]], dtype=np.float32)
expected_localization_loss = 0.25
expected_classification_loss = (
batch_size * num_anchors * (num_classes + 1) * np.log(2.0))
batch_size * num_anchors * num_classes * np.log(2.0))
(localization_loss, classification_loss) = self.execute(
graph_fn, [
preprocessed_input, groundtruth_boxes1, groundtruth_boxes2,
......
......@@ -119,7 +119,7 @@ class SSDMetaArchTestBase(test_case.TestCase):
random_example_sampling=False,
weight_regression_loss_by_score=False,
use_expected_classification_loss_under_sampling=False,
minimum_negative_sampling=1,
min_num_negative_samples=1,
desired_negative_sampling_ratio=3,
use_keras=False,
predict_mask=False,
......@@ -130,10 +130,12 @@ class SSDMetaArchTestBase(test_case.TestCase):
mock_anchor_generator = MockAnchorGenerator2x2()
if use_keras:
mock_box_predictor = test_utils.MockKerasBoxPredictor(
is_training, num_classes, predict_mask=predict_mask)
is_training, num_classes, add_background_class=add_background_class,
predict_mask=predict_mask)
else:
mock_box_predictor = test_utils.MockBoxPredictor(
is_training, num_classes, predict_mask=predict_mask)
is_training, num_classes, add_background_class=add_background_class,
predict_mask=predict_mask)
mock_box_coder = test_utils.MockBoxCoder()
if use_keras:
fake_feature_extractor = FakeSSDKerasFeatureExtractor()
......@@ -182,7 +184,7 @@ class SSDMetaArchTestBase(test_case.TestCase):
if use_expected_classification_loss_under_sampling:
expected_classification_loss_under_sampling = functools.partial(
ops.expected_classification_loss_under_sampling,
minimum_negative_sampling=minimum_negative_sampling,
min_num_negative_samples=min_num_negative_samples,
desired_negative_sampling_ratio=desired_negative_sampling_ratio)
code_size = 4
......
......@@ -248,27 +248,30 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
detection_boxes_batched,
detection_scores_batched,
detection_classes_batched,
num_det_boxes_per_image):
num_det_boxes_per_image,
is_annotated_batched):
"""Update operation for adding batch of images to Coco evaluator."""
for (image_id, gt_box, gt_class, gt_is_crowd, num_gt_box, det_box,
det_score, det_class, num_det_box) in zip(
det_score, det_class, num_det_box, is_annotated) in zip(
image_id_batched, groundtruth_boxes_batched,
groundtruth_classes_batched, groundtruth_is_crowd_batched,
num_gt_boxes_per_image,
detection_boxes_batched, detection_scores_batched,
detection_classes_batched, num_det_boxes_per_image):
self.add_single_ground_truth_image_info(
image_id, {
'groundtruth_boxes': gt_box[:num_gt_box],
'groundtruth_classes': gt_class[:num_gt_box],
'groundtruth_is_crowd': gt_is_crowd[:num_gt_box]
})
self.add_single_detected_image_info(
image_id,
{'detection_boxes': det_box[:num_det_box],
'detection_scores': det_score[:num_det_box],
'detection_classes': det_class[:num_det_box]})
detection_classes_batched, num_det_boxes_per_image,
is_annotated_batched):
if is_annotated:
self.add_single_ground_truth_image_info(
image_id, {
'groundtruth_boxes': gt_box[:num_gt_box],
'groundtruth_classes': gt_class[:num_gt_box],
'groundtruth_is_crowd': gt_is_crowd[:num_gt_box]
})
self.add_single_detected_image_info(
image_id,
{'detection_boxes': det_box[:num_det_box],
'detection_scores': det_score[:num_det_box],
'detection_classes': det_class[:num_det_box]})
# Unpack items from the evaluation dictionary.
input_data_fields = standard_fields.InputDataFields
......@@ -284,6 +287,7 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
num_gt_boxes_per_image = eval_dict.get(
'num_groundtruth_boxes_per_image', None)
num_det_boxes_per_image = eval_dict.get('num_det_boxes_per_image', None)
is_annotated = eval_dict.get('is_annotated', None)
if groundtruth_is_crowd is None:
groundtruth_is_crowd = tf.zeros_like(groundtruth_classes, dtype=tf.bool)
......@@ -306,6 +310,11 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
num_det_boxes_per_image = tf.shape(detection_boxes)[1:2]
else:
num_det_boxes_per_image = tf.expand_dims(num_det_boxes_per_image, 0)
if is_annotated is None:
is_annotated = tf.constant([True])
else:
is_annotated = tf.expand_dims(is_annotated, 0)
else:
if num_gt_boxes_per_image is None:
num_gt_boxes_per_image = tf.tile(
......@@ -315,6 +324,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
num_det_boxes_per_image = tf.tile(
tf.shape(detection_boxes)[1:2],
multiples=tf.shape(detection_boxes)[0:1])
if is_annotated is None:
is_annotated = tf.ones_like(image_id, dtype=tf.bool)
update_op = tf.py_func(update_op, [image_id,
groundtruth_boxes,
......@@ -324,7 +335,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
detection_boxes,
detection_scores,
detection_classes,
num_det_boxes_per_image], [])
num_det_boxes_per_image,
is_annotated], [])
metric_names = ['DetectionBoxes_Precision/mAP',
'DetectionBoxes_Precision/mAP@.50IOU',
'DetectionBoxes_Precision/mAP@.75IOU',
......@@ -581,8 +593,11 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
Args:
eval_dict: A dictionary that holds tensors for evaluating object detection
performance. This dictionary may be produced from
eval_util.result_dict_for_single_example().
performance. For single-image evaluation, this dictionary may be
produced from eval_util.result_dict_for_single_example(). If multi-image
evaluation, `eval_dict` should contain the fields
'num_groundtruth_boxes_per_image' and 'num_det_boxes_per_image' to
properly unpad the tensors from the batch.
Returns:
a dictionary of metric names to tuple of value_op and update_op that can
......@@ -590,27 +605,41 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
update ops must be run together and similarly all value ops must be run
together to guarantee correct behaviour.
"""
def update_op(
image_id,
groundtruth_boxes,
groundtruth_classes,
groundtruth_instance_masks,
groundtruth_is_crowd,
detection_scores,
detection_classes,
detection_masks):
def update_op(image_id_batched, groundtruth_boxes_batched,
groundtruth_classes_batched,
groundtruth_instance_masks_batched,
groundtruth_is_crowd_batched, num_gt_boxes_per_image,
detection_scores_batched, detection_classes_batched,
detection_masks_batched, num_det_boxes_per_image):
"""Update op for metrics."""
self.add_single_ground_truth_image_info(
image_id,
{'groundtruth_boxes': groundtruth_boxes,
'groundtruth_classes': groundtruth_classes,
'groundtruth_instance_masks': groundtruth_instance_masks,
'groundtruth_is_crowd': groundtruth_is_crowd})
self.add_single_detected_image_info(
image_id,
{'detection_scores': detection_scores,
'detection_classes': detection_classes,
'detection_masks': detection_masks})
for (image_id, groundtruth_boxes, groundtruth_classes,
groundtruth_instance_masks, groundtruth_is_crowd, num_gt_box,
detection_scores, detection_classes,
detection_masks, num_det_box) in zip(
image_id_batched, groundtruth_boxes_batched,
groundtruth_classes_batched, groundtruth_instance_masks_batched,
groundtruth_is_crowd_batched, num_gt_boxes_per_image,
detection_scores_batched, detection_classes_batched,
detection_masks_batched, num_det_boxes_per_image):
self.add_single_ground_truth_image_info(
image_id, {
'groundtruth_boxes':
groundtruth_boxes[:num_gt_box],
'groundtruth_classes':
groundtruth_classes[:num_gt_box],
'groundtruth_instance_masks':
groundtruth_instance_masks[:num_gt_box],
'groundtruth_is_crowd':
groundtruth_is_crowd[:num_gt_box]
})
self.add_single_detected_image_info(
image_id, {
'detection_scores': detection_scores[:num_det_box],
'detection_classes': detection_classes[:num_det_box],
'detection_masks': detection_masks[:num_det_box]
})
# Unpack items from the evaluation dictionary.
input_data_fields = standard_fields.InputDataFields
......@@ -622,20 +651,54 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
input_data_fields.groundtruth_instance_masks]
groundtruth_is_crowd = eval_dict.get(
input_data_fields.groundtruth_is_crowd, None)
num_gt_boxes_per_image = eval_dict.get(
input_data_fields.num_groundtruth_boxes, None)
detection_scores = eval_dict[detection_fields.detection_scores]
detection_classes = eval_dict[detection_fields.detection_classes]
detection_masks = eval_dict[detection_fields.detection_masks]
num_det_boxes_per_image = eval_dict.get(detection_fields.num_detections,
None)
if groundtruth_is_crowd is None:
groundtruth_is_crowd = tf.zeros_like(groundtruth_classes, dtype=tf.bool)
update_op = tf.py_func(update_op, [image_id,
groundtruth_boxes,
groundtruth_classes,
groundtruth_instance_masks,
groundtruth_is_crowd,
detection_scores,
detection_classes,
detection_masks], [])
if not image_id.shape.as_list():
# Apply a batch dimension to all tensors.
image_id = tf.expand_dims(image_id, 0)
groundtruth_boxes = tf.expand_dims(groundtruth_boxes, 0)
groundtruth_classes = tf.expand_dims(groundtruth_classes, 0)
groundtruth_instance_masks = tf.expand_dims(groundtruth_instance_masks, 0)
groundtruth_is_crowd = tf.expand_dims(groundtruth_is_crowd, 0)
detection_scores = tf.expand_dims(detection_scores, 0)
detection_classes = tf.expand_dims(detection_classes, 0)
detection_masks = tf.expand_dims(detection_masks, 0)
if num_gt_boxes_per_image is None:
num_gt_boxes_per_image = tf.shape(groundtruth_boxes)[1:2]
else:
num_gt_boxes_per_image = tf.expand_dims(num_gt_boxes_per_image, 0)
if num_det_boxes_per_image is None:
num_det_boxes_per_image = tf.shape(detection_scores)[1:2]
else:
num_det_boxes_per_image = tf.expand_dims(num_det_boxes_per_image, 0)
else:
if num_gt_boxes_per_image is None:
num_gt_boxes_per_image = tf.tile(
tf.shape(groundtruth_boxes)[1:2],
multiples=tf.shape(groundtruth_boxes)[0:1])
if num_det_boxes_per_image is None:
num_det_boxes_per_image = tf.tile(
tf.shape(detection_scores)[1:2],
multiples=tf.shape(detection_scores)[0:1])
update_op = tf.py_func(update_op, [
image_id, groundtruth_boxes, groundtruth_classes,
groundtruth_instance_masks, groundtruth_is_crowd,
num_gt_boxes_per_image, detection_scores, detection_classes,
detection_masks, num_det_boxes_per_image
], [])
metric_names = ['DetectionMasks_Precision/mAP',
'DetectionMasks_Precision/mAP@.50IOU',
'DetectionMasks_Precision/mAP@.75IOU',
......
......@@ -308,6 +308,99 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
self.assertFalse(coco_evaluator._detection_boxes_list)
self.assertFalse(coco_evaluator._image_ids)
def testGetOneMAPWithMatchingGroundtruthAndDetectionsIsAnnotated(self):
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
_get_categories_list())
image_id = tf.placeholder(tf.string, shape=())
groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4))
groundtruth_classes = tf.placeholder(tf.float32, shape=(None))
is_annotated = tf.placeholder(tf.bool, shape=())
detection_boxes = tf.placeholder(tf.float32, shape=(None, 4))
detection_scores = tf.placeholder(tf.float32, shape=(None))
detection_classes = tf.placeholder(tf.float32, shape=(None))
input_data_fields = standard_fields.InputDataFields
detection_fields = standard_fields.DetectionResultFields
eval_dict = {
input_data_fields.key: image_id,
input_data_fields.groundtruth_boxes: groundtruth_boxes,
input_data_fields.groundtruth_classes: groundtruth_classes,
'is_annotated': is_annotated,
detection_fields.detection_boxes: detection_boxes,
detection_fields.detection_scores: detection_scores,
detection_fields.detection_classes: detection_classes
}
eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops(eval_dict)
_, update_op = eval_metric_ops['DetectionBoxes_Precision/mAP']
with self.test_session() as sess:
sess.run(update_op,
feed_dict={
image_id: 'image1',
groundtruth_boxes: np.array([[100., 100., 200., 200.]]),
groundtruth_classes: np.array([1]),
is_annotated: True,
detection_boxes: np.array([[100., 100., 200., 200.]]),
detection_scores: np.array([.8]),
detection_classes: np.array([1])
})
sess.run(update_op,
feed_dict={
image_id: 'image2',
groundtruth_boxes: np.array([[50., 50., 100., 100.]]),
groundtruth_classes: np.array([3]),
is_annotated: True,
detection_boxes: np.array([[50., 50., 100., 100.]]),
detection_scores: np.array([.7]),
detection_classes: np.array([3])
})
sess.run(update_op,
feed_dict={
image_id: 'image3',
groundtruth_boxes: np.array([[25., 25., 50., 50.]]),
groundtruth_classes: np.array([2]),
is_annotated: True,
detection_boxes: np.array([[25., 25., 50., 50.]]),
detection_scores: np.array([.9]),
detection_classes: np.array([2])
})
sess.run(update_op,
feed_dict={
image_id: 'image4',
groundtruth_boxes: np.zeros((0, 4)),
groundtruth_classes: np.zeros((0)),
is_annotated: False, # Note that this image isn't annotated.
detection_boxes: np.array([[25., 25., 50., 50.],
[25., 25., 70., 50.],
[25., 25., 80., 50.],
[25., 25., 90., 50.]]),
detection_scores: np.array([0.6, 0.7, 0.8, 0.9]),
detection_classes: np.array([1, 2, 2, 3])
})
metrics = {}
for key, (value_op, _) in eval_metric_ops.iteritems():
metrics[key] = value_op
metrics = sess.run(metrics)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.50IOU'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'],
1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'],
1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0)
self.assertFalse(coco_evaluator._groundtruth_list)
self.assertFalse(coco_evaluator._detection_boxes_list)
self.assertFalse(coco_evaluator._image_ids)
def testGetOneMAPWithMatchingGroundtruthAndDetectionsPadded(self):
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
_get_categories_list())
......@@ -665,22 +758,40 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
_, update_op = eval_metric_ops['DetectionMasks_Precision/mAP']
with self.test_session() as sess:
sess.run(update_op,
feed_dict={
image_id: 'image1',
groundtruth_boxes: np.array([[100., 100., 200., 200.]]),
groundtruth_classes: np.array([1]),
groundtruth_masks: np.pad(np.ones([1, 100, 100],
dtype=np.uint8),
((0, 0), (10, 10), (10, 10)),
mode='constant'),
detection_scores: np.array([.8]),
detection_classes: np.array([1]),
detection_masks: np.pad(np.ones([1, 100, 100],
dtype=np.uint8),
((0, 0), (10, 10), (10, 10)),
mode='constant')
})
sess.run(
update_op,
feed_dict={
image_id:
'image1',
groundtruth_boxes:
np.array([[100., 100., 200., 200.], [50., 50., 100., 100.]]),
groundtruth_classes:
np.array([1, 2]),
groundtruth_masks:
np.stack([
np.pad(
np.ones([100, 100], dtype=np.uint8), ((10, 10),
(10, 10)),
mode='constant'),
np.pad(
np.ones([50, 50], dtype=np.uint8), ((0, 70), (0, 70)),
mode='constant')
]),
detection_scores:
np.array([.9, .8]),
detection_classes:
np.array([2, 1]),
detection_masks:
np.stack([
np.pad(
np.ones([50, 50], dtype=np.uint8), ((0, 70), (0, 70)),
mode='constant'),
np.pad(
np.ones([100, 100], dtype=np.uint8), ((10, 10),
(10, 10)),
mode='constant'),
])
})
sess.run(update_op,
feed_dict={
image_id: 'image2',
......@@ -735,6 +846,106 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
self.assertFalse(coco_evaluator._image_id_to_mask_shape_map)
self.assertFalse(coco_evaluator._detection_masks_list)
def testGetOneMAPWithMatchingGroundtruthAndDetectionsBatched(self):
coco_evaluator = coco_evaluation.CocoMaskEvaluator(_get_categories_list())
batch_size = 3
image_id = tf.placeholder(tf.string, shape=(batch_size))
groundtruth_boxes = tf.placeholder(tf.float32, shape=(batch_size, None, 4))
groundtruth_classes = tf.placeholder(tf.float32, shape=(batch_size, None))
groundtruth_masks = tf.placeholder(
tf.uint8, shape=(batch_size, None, None, None))
detection_scores = tf.placeholder(tf.float32, shape=(batch_size, None))
detection_classes = tf.placeholder(tf.float32, shape=(batch_size, None))
detection_masks = tf.placeholder(
tf.uint8, shape=(batch_size, None, None, None))
input_data_fields = standard_fields.InputDataFields
detection_fields = standard_fields.DetectionResultFields
eval_dict = {
input_data_fields.key: image_id,
input_data_fields.groundtruth_boxes: groundtruth_boxes,
input_data_fields.groundtruth_classes: groundtruth_classes,
input_data_fields.groundtruth_instance_masks: groundtruth_masks,
detection_fields.detection_scores: detection_scores,
detection_fields.detection_classes: detection_classes,
detection_fields.detection_masks: detection_masks,
}
eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops(eval_dict)
_, update_op = eval_metric_ops['DetectionMasks_Precision/mAP']
with self.test_session() as sess:
sess.run(
update_op,
feed_dict={
image_id: ['image1', 'image2', 'image3'],
groundtruth_boxes:
np.array([[[100., 100., 200., 200.]],
[[50., 50., 100., 100.]],
[[25., 25., 50., 50.]]]),
groundtruth_classes:
np.array([[1], [1], [1]]),
groundtruth_masks:
np.stack([
np.pad(
np.ones([1, 100, 100], dtype=np.uint8),
((0, 0), (0, 0), (0, 0)),
mode='constant'),
np.pad(
np.ones([1, 50, 50], dtype=np.uint8),
((0, 0), (25, 25), (25, 25)),
mode='constant'),
np.pad(
np.ones([1, 25, 25], dtype=np.uint8),
((0, 0), (37, 38), (37, 38)),
mode='constant')
],
axis=0),
detection_scores:
np.array([[.8], [.8], [.8]]),
detection_classes:
np.array([[1], [1], [1]]),
detection_masks:
np.stack([
np.pad(
np.ones([1, 100, 100], dtype=np.uint8),
((0, 0), (0, 0), (0, 0)),
mode='constant'),
np.pad(
np.ones([1, 50, 50], dtype=np.uint8),
((0, 0), (25, 25), (25, 25)),
mode='constant'),
np.pad(
np.ones([1, 25, 25], dtype=np.uint8),
((0, 0), (37, 38), (37, 38)),
mode='constant')
],
axis=0)
})
metrics = {}
for key, (value_op, _) in eval_metric_ops.iteritems():
metrics[key] = value_op
metrics = sess.run(metrics)
self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP'], 1.0)
self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP@.50IOU'], 1.0)
self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP@.75IOU'], 1.0)
self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP (medium)'],
1.0)
self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP (small)'], 1.0)
self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@1'], 1.0)
self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@10'], 1.0)
self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@100'], 1.0)
self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@100 (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@100 (medium)'],
1.0)
self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@100 (small)'], 1.0)
self.assertFalse(coco_evaluator._groundtruth_list)
self.assertFalse(coco_evaluator._image_ids_with_detections)
self.assertFalse(coco_evaluator._image_id_to_mask_shape_map)
self.assertFalse(coco_evaluator._detection_masks_list)
if __name__ == '__main__':
tf.test.main()
......@@ -25,6 +25,7 @@ import os
import tensorflow as tf
from object_detection import eval_util
from object_detection import exporter as exporter_lib
from object_detection import inputs
from object_detection.builders import graph_rewriter_builder
from object_detection.builders import model_builder
......@@ -306,8 +307,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
prediction_dict, features[fields.InputDataFields.true_image_shape])
losses = [loss_tensor for loss_tensor in losses_dict.values()]
if train_config.add_regularization_loss:
regularization_losses = tf.get_collection(
tf.GraphKeys.REGULARIZATION_LOSSES)
regularization_losses = detection_model.regularization_losses()
if regularization_losses:
regularization_loss = tf.add_n(
regularization_losses, name='regularization_loss')
......@@ -353,20 +353,24 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
for var in optimizer_summary_vars:
tf.summary.scalar(var.op.name, var)
summaries = [] if use_tpu else None
if train_config.summarize_gradients:
summaries = ['gradients', 'gradient_norm', 'global_gradient_norm']
train_op = tf.contrib.layers.optimize_loss(
loss=total_loss,
global_step=global_step,
learning_rate=None,
clip_gradients=clip_gradients_value,
optimizer=training_optimizer,
update_ops=detection_model.updates(),
variables=trainable_variables,
summaries=summaries,
name='') # Preventing scope prefix on all variables.
if mode == tf.estimator.ModeKeys.PREDICT:
exported_output = exporter_lib.add_output_tensor_nodes(detections)
export_outputs = {
tf.saved_model.signature_constants.PREDICT_METHOD_NAME:
tf.estimator.export.PredictOutput(detections)
tf.estimator.export.PredictOutput(exported_output)
}
eval_metric_ops = None
......@@ -456,6 +460,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
def create_estimator_and_inputs(run_config,
hparams,
pipeline_config_path,
config_override=None,
train_steps=None,
sample_1_of_n_eval_examples=1,
sample_1_of_n_eval_on_train_examples=1,
......@@ -465,6 +470,7 @@ def create_estimator_and_inputs(run_config,
num_shards=1,
params=None,
override_eval_num_epochs=True,
save_final_config=False,
**kwargs):
"""Creates `Estimator`, input functions, and steps.
......@@ -472,6 +478,8 @@ def create_estimator_and_inputs(run_config,
run_config: A `RunConfig`.
hparams: A `HParams`.
pipeline_config_path: A path to a pipeline config file.
config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to
override the config from `pipeline_config_path`.
train_steps: Number of training steps. If None, the number of training steps
is set from the `TrainConfig` proto.
sample_1_of_n_eval_examples: Integer representing how often an eval example
......@@ -499,6 +507,8 @@ def create_estimator_and_inputs(run_config,
`use_tpu_estimator` is True.
override_eval_num_epochs: Whether to overwrite the number of epochs to
1 for eval_input.
save_final_config: Whether to save final config (obtained after applying
overrides) to `estimator.model_dir`.
**kwargs: Additional keyword arguments for configuration override.
Returns:
......@@ -522,7 +532,8 @@ def create_estimator_and_inputs(run_config,
create_eval_input_fn = MODEL_BUILD_UTIL_MAP['create_eval_input_fn']
create_predict_input_fn = MODEL_BUILD_UTIL_MAP['create_predict_input_fn']
configs = get_configs_from_pipeline_file(pipeline_config_path)
configs = get_configs_from_pipeline_file(pipeline_config_path,
config_override=config_override)
kwargs.update({
'train_steps': train_steps,
'sample_1_of_n_eval_examples': sample_1_of_n_eval_examples
......@@ -595,7 +606,7 @@ def create_estimator_and_inputs(run_config,
estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config)
# Write the as-run pipeline config to disk.
if run_config.is_chief:
if run_config.is_chief and save_final_config:
pipeline_config_final = create_pipeline_proto_from_configs(configs)
config_util.save_pipeline_config(pipeline_config_final, estimator.model_dir)
......@@ -641,11 +652,17 @@ def create_train_and_eval_specs(train_input_fn,
input_fn=train_input_fn, max_steps=train_steps)
if eval_spec_names is None:
eval_spec_names = [ str(i) for i in range(len(eval_input_fns)) ]
eval_spec_names = [str(i) for i in range(len(eval_input_fns))]
eval_specs = []
for eval_spec_name, eval_input_fn in zip(eval_spec_names, eval_input_fns):
exporter_name = '{}_{}'.format(final_exporter_name, eval_spec_name)
for index, (eval_spec_name, eval_input_fn) in enumerate(
zip(eval_spec_names, eval_input_fns)):
# Uses final_exporter_name as exporter_name for the first eval spec for
# backward compatibility.
if index == 0:
exporter_name = final_exporter_name
else:
exporter_name = '{}_{}'.format(final_exporter_name, eval_spec_name)
exporter = tf.estimator.FinalExporter(
name=exporter_name, serving_input_receiver_fn=predict_input_fn)
eval_specs.append(
......@@ -747,6 +764,7 @@ def populate_experiment(run_config,
train_steps=train_steps,
eval_steps=eval_steps,
model_fn_creator=model_fn_creator,
save_final_config=True,
**kwargs)
estimator = train_and_eval_dict['estimator']
train_input_fn = train_and_eval_dict['train_input_fn']
......
......@@ -310,7 +310,7 @@ class ModelLibTest(tf.test.TestCase):
self.assertEqual(2, len(eval_specs))
self.assertEqual(None, eval_specs[0].steps)
self.assertEqual('holdout', eval_specs[0].name)
self.assertEqual('exporter_holdout', eval_specs[0].exporters[0].name)
self.assertEqual('exporter', eval_specs[0].exporters[0].name)
self.assertEqual(None, eval_specs[1].steps)
self.assertEqual('eval_on_train', eval_specs[1].name)
......
......@@ -114,6 +114,7 @@ def main(unused_argv):
use_tpu_estimator=True,
use_tpu=FLAGS.use_tpu,
num_shards=FLAGS.num_shards,
save_final_config=FLAGS.mode == 'train',
**kwargs)
estimator = train_and_eval_dict['estimator']
train_input_fn = train_and_eval_dict['train_input_fn']
......
......@@ -72,6 +72,8 @@ class FasterRCNNResnetV1FeatureExtractor(
VGG style channel mean subtraction as described here:
https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-md
Note that if the number of channels is not equal to 3, the mean subtraction
will be skipped and the original resized_inputs will be returned.
Args:
resized_inputs: A [batch, height_in, width_in, channels] float32 tensor
......@@ -82,8 +84,11 @@ class FasterRCNNResnetV1FeatureExtractor(
tensor representing a batch of images.
"""
channel_means = [123.68, 116.779, 103.939]
return resized_inputs - [[channel_means]]
if resized_inputs.shape.as_list()[3] == 3:
channel_means = [123.68, 116.779, 103.939]
return resized_inputs - [[channel_means]]
else:
return resized_inputs
def _extract_proposal_features(self, preprocessed_inputs, scope):
"""Extracts first stage RPN features.
......
......@@ -146,7 +146,6 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
use_depthwise = feature_map_layout['use_depthwise']
for index, from_layer in enumerate(feature_map_layout['from_layer']):
net = []
self.convolutions.append(net)
layer_depth = feature_map_layout['layer_depth'][index]
conv_kernel_size = 3
if 'conv_kernel_size' in feature_map_layout:
......@@ -231,6 +230,10 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
conv_hyperparams.build_activation_layer(
name=layer_name))
# Until certain bugs are fixed in checkpointable lists,
# this net must be appended only once it's been filled with layers
self.convolutions.append(net)
def call(self, image_features):
"""Generate the multi-resolution feature maps.
......@@ -263,7 +266,8 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
min_depth, insert_1x1_conv, image_features):
min_depth, insert_1x1_conv, image_features,
pool_residual=False):
"""Generates multi resolution feature maps from input image features.
Generates multi-scale feature maps for detection as in the SSD papers by
......@@ -317,6 +321,13 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
should be inserted before shrinking the feature map.
image_features: A dictionary of handles to activation tensors from the
base feature extractor.
pool_residual: Whether to add an average pooling layer followed by a
residual connection between subsequent feature maps when the channel
depth match. For example, with option 'layer_depth': [-1, 512, 256, 256],
a pooling and residual layer is added between the third and forth feature
map. This option is better used with Weight Shared Convolution Box
Predictor when all feature maps have the same channel depth to encourage
more consistent features across multi-scale feature maps.
Returns:
feature_maps: an OrderedDict mapping keys (feature map names) to
......@@ -350,6 +361,7 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
feature_map_keys.append(from_layer)
else:
pre_layer = feature_maps[-1]
pre_layer_depth = pre_layer.get_shape().as_list()[3]
intermediate_layer = pre_layer
if insert_1x1_conv:
layer_name = '{}_1_Conv2d_{}_1x1_{}'.format(
......@@ -383,6 +395,12 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
padding='SAME',
stride=1,
scope=layer_name)
if pool_residual and pre_layer_depth == depth_fn(layer_depth):
feature_map += slim.avg_pool2d(
pre_layer, [3, 3],
padding='SAME',
stride=2,
scope=layer_name + '_pool')
else:
feature_map = slim.conv2d(
intermediate_layer,
......@@ -399,6 +417,7 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
def fpn_top_down_feature_maps(image_features,
depth,
use_depthwise=False,
use_explicit_padding=False,
scope=None):
"""Generates `top-down` feature maps for Feature Pyramid Networks.
......@@ -409,7 +428,9 @@ def fpn_top_down_feature_maps(image_features,
Spatial resolutions of succesive tensors must reduce exactly by a factor
of 2.
depth: depth of output feature maps.
use_depthwise: use depthwise separable conv instead of regular conv.
use_depthwise: whether to use depthwise separable conv instead of regular
conv.
use_explicit_padding: whether to use explicit padding.
scope: A scope name to wrap this op under.
Returns:
......@@ -420,8 +441,10 @@ def fpn_top_down_feature_maps(image_features,
num_levels = len(image_features)
output_feature_maps_list = []
output_feature_map_keys = []
padding = 'VALID' if use_explicit_padding else 'SAME'
kernel_size = 3
with slim.arg_scope(
[slim.conv2d, slim.separable_conv2d], padding='SAME', stride=1):
[slim.conv2d, slim.separable_conv2d], padding=padding, stride=1):
top_down = slim.conv2d(
image_features[-1][1],
depth, [1, 1], activation_fn=None, normalizer_fn=None,
......@@ -436,14 +459,20 @@ def fpn_top_down_feature_maps(image_features,
image_features[level][1], depth, [1, 1],
activation_fn=None, normalizer_fn=None,
scope='projection_%d' % (level + 1))
if use_explicit_padding:
# slice top_down to the same shape as residual
residual_shape = tf.shape(residual)
top_down = top_down[:, :residual_shape[1], :residual_shape[2], :]
top_down += residual
if use_depthwise:
conv_op = functools.partial(slim.separable_conv2d, depth_multiplier=1)
else:
conv_op = slim.conv2d
if use_explicit_padding:
top_down = ops.fixed_padding(top_down, kernel_size)
output_feature_maps_list.append(conv_op(
top_down,
depth, [3, 3],
depth, [kernel_size, kernel_size],
scope='smoothing_%d' % (level + 1)))
output_feature_map_keys.append('top_down_%s' % image_features[level][0])
return collections.OrderedDict(reversed(
......
......@@ -45,6 +45,11 @@ EMBEDDED_SSD_MOBILENET_V1_LAYOUT = {
'conv_kernel_size': [-1, -1, 3, 3, 2],
}
SSD_MOBILENET_V1_WEIGHT_SHARED_LAYOUT = {
'from_layer': ['Conv2d_13_pointwise', '', '', ''],
'layer_depth': [-1, 256, 256, 256],
}
@parameterized.parameters(
{'use_keras': False},
......@@ -67,7 +72,8 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
def _build_feature_map_generator(self, feature_map_layout, use_keras):
def _build_feature_map_generator(self, feature_map_layout, use_keras,
pool_residual=False):
if use_keras:
return feature_map_generators.KerasMultiResolutionFeatureMaps(
feature_map_layout=feature_map_layout,
......@@ -86,7 +92,8 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
depth_multiplier=1,
min_depth=32,
insert_1x1_conv=True,
image_features=image_features)
image_features=image_features,
pool_residual=pool_residual)
return feature_map_generator
def test_get_expected_feature_map_shapes_with_inception_v2(self, use_keras):
......@@ -209,6 +216,34 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
(key, value.shape) for key, value in out_feature_maps.items())
self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
def test_feature_map_shapes_with_pool_residual_ssd_mobilenet_v1(
self, use_keras):
image_features = {
'Conv2d_13_pointwise': tf.random_uniform([4, 8, 8, 1024],
dtype=tf.float32),
}
feature_map_generator = self._build_feature_map_generator(
feature_map_layout=SSD_MOBILENET_V1_WEIGHT_SHARED_LAYOUT,
use_keras=use_keras,
pool_residual=True
)
feature_maps = feature_map_generator(image_features)
expected_feature_map_shapes = {
'Conv2d_13_pointwise': (4, 8, 8, 1024),
'Conv2d_13_pointwise_2_Conv2d_1_3x3_s2_256': (4, 4, 4, 256),
'Conv2d_13_pointwise_2_Conv2d_2_3x3_s2_256': (4, 2, 2, 256),
'Conv2d_13_pointwise_2_Conv2d_3_3x3_s2_256': (4, 1, 1, 256)}
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
out_feature_maps = sess.run(feature_maps)
out_feature_map_shapes = dict(
(key, value.shape) for key, value in out_feature_maps.items())
self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
def test_get_expected_variable_names_with_inception_v2(self, use_keras):
image_features = {
'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
......
......@@ -82,6 +82,8 @@ class _LayersOverride(object):
self._conv_hyperparams = conv_hyperparams
self._use_explicit_padding = use_explicit_padding
self._min_depth = min_depth
self.regularizer = tf.keras.regularizers.l2(0.00004 * 0.5)
self.initializer = tf.truncated_normal_initializer(stddev=0.09)
def _FixedPaddingLayer(self, kernel_size):
return tf.keras.layers.Lambda(lambda x: ops.fixed_padding(x, kernel_size))
......@@ -114,6 +116,9 @@ class _LayersOverride(object):
if self._conv_hyperparams:
kwargs = self._conv_hyperparams.params(**kwargs)
else:
kwargs['kernel_regularizer'] = self.regularizer
kwargs['kernel_initializer'] = self.initializer
kwargs['padding'] = 'same'
kernel_size = kwargs.get('kernel_size')
......@@ -144,6 +149,8 @@ class _LayersOverride(object):
"""
if self._conv_hyperparams:
kwargs = self._conv_hyperparams.params(**kwargs)
else:
kwargs['depthwise_initializer'] = self.initializer
kwargs['padding'] = 'same'
kernel_size = kwargs.get('kernel_size')
......
......@@ -31,11 +31,10 @@ slim = tf.contrib.slim
# A modified config of mobilenet v1 that makes it more detection friendly,
def _create_modified_mobilenet_config():
conv_defs = copy.copy(mobilenet_v1.MOBILENETV1_CONV_DEFS)
conv_defs = copy.deepcopy(mobilenet_v1.MOBILENETV1_CONV_DEFS)
conv_defs[-2] = mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=2, depth=512)
conv_defs[-1] = mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=256)
return conv_defs
_CONV_DEFS = _create_modified_mobilenet_config()
class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
......@@ -98,6 +97,9 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
self._fpn_min_level = fpn_min_level
self._fpn_max_level = fpn_max_level
self._additional_layer_depth = additional_layer_depth
self._conv_defs = None
if self._use_depthwise:
self._conv_defs = _create_modified_mobilenet_config()
def preprocess(self, resized_inputs):
"""SSD preprocessing.
......@@ -141,7 +143,7 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
final_endpoint='Conv2d_13_pointwise',
min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
conv_defs=_CONV_DEFS if self._use_depthwise else None,
conv_defs=self._conv_defs,
use_explicit_padding=self._use_explicit_padding,
scope=scope)
......@@ -159,7 +161,8 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
fpn_features = feature_map_generators.fpn_top_down_feature_maps(
[(key, image_features[key]) for key in feature_block_list],
depth=depth_fn(self._additional_layer_depth),
use_depthwise=self._use_depthwise)
use_depthwise=self._use_depthwise,
use_explicit_padding=self._use_explicit_padding)
feature_maps = []
for level in range(self._fpn_min_level, base_fpn_max_level + 1):
feature_maps.append(fpn_features['top_down_{}'.format(
......@@ -167,18 +170,23 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
last_feature_map = fpn_features['top_down_{}'.format(
feature_blocks[base_fpn_max_level - 2])]
# Construct coarse features
padding = 'VALID' if self._use_explicit_padding else 'SAME'
kernel_size = 3
for i in range(base_fpn_max_level + 1, self._fpn_max_level + 1):
if self._use_depthwise:
conv_op = functools.partial(
slim.separable_conv2d, depth_multiplier=1)
else:
conv_op = slim.conv2d
if self._use_explicit_padding:
last_feature_map = ops.fixed_padding(
last_feature_map, kernel_size)
last_feature_map = conv_op(
last_feature_map,
num_outputs=depth_fn(self._additional_layer_depth),
kernel_size=[3, 3],
kernel_size=[kernel_size, kernel_size],
stride=2,
padding='SAME',
padding=padding,
scope='bottom_up_Conv2d_{}'.format(i - base_fpn_max_level + 13))
feature_maps.append(last_feature_map)
return feature_maps
......@@ -30,17 +30,14 @@ from nets.mobilenet import mobilenet_v2
slim = tf.contrib.slim
# A modified config of mobilenet v2 that makes it more detection friendly,
# A modified config of mobilenet v2 that makes it more detection friendly.
def _create_modified_mobilenet_config():
conv_defs = copy.copy(mobilenet_v2.V2_DEF)
conv_defs = copy.deepcopy(mobilenet_v2.V2_DEF)
conv_defs['spec'][-1] = mobilenet.op(
slim.conv2d, stride=1, kernel_size=[1, 1], num_outputs=256)
return conv_defs
_CONV_DEFS = _create_modified_mobilenet_config()
class SSDMobileNetV2FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""SSD Feature Extractor using MobilenetV2 FPN features."""
......@@ -100,6 +97,9 @@ class SSDMobileNetV2FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
self._fpn_min_level = fpn_min_level
self._fpn_max_level = fpn_max_level
self._additional_layer_depth = additional_layer_depth
self._conv_defs = None
if self._use_depthwise:
self._conv_defs = _create_modified_mobilenet_config()
def preprocess(self, resized_inputs):
"""SSD preprocessing.
......@@ -142,7 +142,7 @@ class SSDMobileNetV2FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
final_endpoint='layer_19',
depth_multiplier=self._depth_multiplier,
conv_defs=_CONV_DEFS if self._use_depthwise else None,
conv_defs=self._conv_defs,
use_explicit_padding=self._use_explicit_padding,
scope=scope)
depth_fn = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
......@@ -158,7 +158,8 @@ class SSDMobileNetV2FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
fpn_features = feature_map_generators.fpn_top_down_feature_maps(
[(key, image_features[key]) for key in feature_block_list],
depth=depth_fn(self._additional_layer_depth),
use_depthwise=self._use_depthwise)
use_depthwise=self._use_depthwise,
use_explicit_padding=self._use_explicit_padding)
feature_maps = []
for level in range(self._fpn_min_level, base_fpn_max_level + 1):
feature_maps.append(fpn_features['top_down_{}'.format(
......@@ -166,18 +167,23 @@ class SSDMobileNetV2FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
last_feature_map = fpn_features['top_down_{}'.format(
feature_blocks[base_fpn_max_level - 2])]
# Construct coarse features
padding = 'VALID' if self._use_explicit_padding else 'SAME'
kernel_size = 3
for i in range(base_fpn_max_level + 1, self._fpn_max_level + 1):
if self._use_depthwise:
conv_op = functools.partial(
slim.separable_conv2d, depth_multiplier=1)
else:
conv_op = slim.conv2d
if self._use_explicit_padding:
last_feature_map = ops.fixed_padding(
last_feature_map, kernel_size)
last_feature_map = conv_op(
last_feature_map,
num_outputs=depth_fn(self._additional_layer_depth),
kernel_size=[3, 3],
kernel_size=[kernel_size, kernel_size],
stride=2,
padding='SAME',
padding=padding,
scope='bottom_up_Conv2d_{}'.format(i - base_fpn_max_level + 19))
feature_maps.append(last_feature_map)
return feature_maps
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment