Commit 505f554c authored by pkulzc's avatar pkulzc Committed by Sergio Guadarrama
Browse files

Internal changes to slim and object detection (#4100)

* Adding option for one_box_for_all_classes to the box_predictor

PiperOrigin-RevId: 192813444

* Extend to accept different ratios of conv channels.

PiperOrigin-RevId: 192837477

* Remove inaccurate caveat from proto file.

PiperOrigin-RevId: 192850747

* Add option to set dropout for classification net in weight shared box predictor.

PiperOrigin-RevId: 192922089

* fix flakiness in testSSDRandomCropWithMultiClassScores due to randomness.

PiperOrigin-RevId: 193067658

* Post-process now works again in train mode.

PiperOrigin-RevId: 193087707

* Adding support for reading in logits as groundtruth labels and applying an optional temperature (scaling) before softmax in support of distillation.

PiperOrigin-RevId: 193119411

* Add a util function to visualize value histogram as a tf.summary.image.

PiperOrigin-RevId: 193137342

* Do not add batch norm parameters to final conv2d ops that predict boxes encodings and class scores in weight shared conv box predictor.

This allows us to set proper bias and force initial predictions to be background when using focal loss.

PiperOrigin-RevId: 193204364

* Make sure the final layers are also resized proportional to conv_depth_ratio.

PiperOrigin-RevId: 193228972

* Remove deprecated batch_norm_trainable field from ssd mobilenet v2 config

PiperOrigin-RevId: 193244778

* Updating coco evaluation metrics to allow for a batch of image info, rather than a single image.

PiperOrigin-RevId: 193382651

* Update protobuf requirements to 3+ in installation docs.

PiperOrigin-RevId: 193409179

* Add support for training keypoints.

PiperOrigin-RevId: 193576336

* Fix data augmentation functions.

PiperOrigin-RevId: 193737238

* Read the default batch size from config file.

PiperOrigin-RevId: 193959861

* Fixing a bug in the coco evaluator.

PiperOrigin-RevId: 193974479

* num_gt_boxes_per_image and num_det_boxes_per_image value incorrect.
Should be not the expand dim.

PiperOrigin-RevId: 194122420

* Add option to evaluate any checkpoint (without requiring write access to that directory and overwriting any existing logs there).

PiperOrigin-RevId: 194292198

* PiperOrigin-RevId: 190346687

* - Expose slim arg_scope function to compute keys to enable tessting.
- Add is_training=None option to mobinenet arg_scopes. This allows the users to set is_training from an outer scope.

PiperOrigin-RevId: 190997959

* Add an option to not set slim arg_scope for batch_norm is_training parameter. This enables users to set the is_training parameter from an outer scope.

PiperOrigin-RevId: 191611934

* PiperOrigin-RevId: 191955231

* PiperOrigin-RevId: 193254125

* PiperOrigin-RevId: 193371562

* PiperOrigin-RevId: 194085628
parent 5c78b9d7
...@@ -80,12 +80,14 @@ def build(argscope_fn, box_predictor_config, is_training, num_classes): ...@@ -80,12 +80,14 @@ def build(argscope_fn, box_predictor_config, is_training, num_classes):
num_classes=num_classes, num_classes=num_classes,
conv_hyperparams_fn=conv_hyperparams_fn, conv_hyperparams_fn=conv_hyperparams_fn,
depth=conv_box_predictor.depth, depth=conv_box_predictor.depth,
num_layers_before_predictor=(conv_box_predictor. num_layers_before_predictor=(
num_layers_before_predictor), conv_box_predictor.num_layers_before_predictor),
kernel_size=conv_box_predictor.kernel_size, kernel_size=conv_box_predictor.kernel_size,
box_code_size=conv_box_predictor.box_code_size, box_code_size=conv_box_predictor.box_code_size,
class_prediction_bias_init=conv_box_predictor.class_prediction_bias_init class_prediction_bias_init=conv_box_predictor.
) class_prediction_bias_init,
use_dropout=conv_box_predictor.use_dropout,
dropout_keep_prob=conv_box_predictor.dropout_keep_probability)
return box_predictor_object return box_predictor_object
if box_predictor_oneof == 'mask_rcnn_box_predictor': if box_predictor_oneof == 'mask_rcnn_box_predictor':
...@@ -113,7 +115,9 @@ def build(argscope_fn, box_predictor_config, is_training, num_classes): ...@@ -113,7 +115,9 @@ def build(argscope_fn, box_predictor_config, is_training, num_classes):
mask_rcnn_box_predictor.mask_prediction_conv_depth), mask_rcnn_box_predictor.mask_prediction_conv_depth),
masks_are_class_agnostic=( masks_are_class_agnostic=(
mask_rcnn_box_predictor.masks_are_class_agnostic), mask_rcnn_box_predictor.masks_are_class_agnostic),
predict_keypoints=mask_rcnn_box_predictor.predict_keypoints) predict_keypoints=mask_rcnn_box_predictor.predict_keypoints,
share_box_across_classes=(
mask_rcnn_box_predictor.share_box_across_classes))
return box_predictor_object return box_predictor_object
if box_predictor_oneof == 'rfcn_box_predictor': if box_predictor_oneof == 'rfcn_box_predictor':
......
...@@ -317,6 +317,7 @@ class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase): ...@@ -317,6 +317,7 @@ class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase):
use_dropout: true use_dropout: true
dropout_keep_probability: 0.8 dropout_keep_probability: 0.8
box_code_size: 3 box_code_size: 3
share_box_across_classes: true
} }
""" """
hyperparams_proto = hyperparams_pb2.Hyperparams() hyperparams_proto = hyperparams_pb2.Hyperparams()
...@@ -338,6 +339,7 @@ class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase): ...@@ -338,6 +339,7 @@ class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase):
self.assertEqual(box_predictor.num_classes, 90) self.assertEqual(box_predictor.num_classes, 90)
self.assertTrue(box_predictor._is_training) self.assertTrue(box_predictor._is_training)
self.assertEqual(box_predictor._box_code_size, 3) self.assertEqual(box_predictor._box_code_size, 3)
self.assertEqual(box_predictor._share_box_across_classes, True)
def test_build_default_mask_rcnn_box_predictor(self): def test_build_default_mask_rcnn_box_predictor(self):
box_predictor_proto = box_predictor_pb2.BoxPredictor() box_predictor_proto = box_predictor_pb2.BoxPredictor()
......
...@@ -121,6 +121,10 @@ def build_faster_rcnn_classification_loss(loss_config): ...@@ -121,6 +121,10 @@ def build_faster_rcnn_classification_loss(loss_config):
config = loss_config.weighted_softmax config = loss_config.weighted_softmax
return losses.WeightedSoftmaxClassificationLoss( return losses.WeightedSoftmaxClassificationLoss(
logit_scale=config.logit_scale) logit_scale=config.logit_scale)
if loss_type == 'weighted_logits_softmax':
config = loss_config.weighted_logits_softmax
return losses.WeightedSoftmaxClassificationAgainstLogitsLoss(
logit_scale=config.logit_scale)
# By default, Faster RCNN second stage classifier uses Softmax loss # By default, Faster RCNN second stage classifier uses Softmax loss
# with anchor-wise outputs. # with anchor-wise outputs.
...@@ -193,6 +197,11 @@ def _build_classification_loss(loss_config): ...@@ -193,6 +197,11 @@ def _build_classification_loss(loss_config):
return losses.WeightedSoftmaxClassificationLoss( return losses.WeightedSoftmaxClassificationLoss(
logit_scale=config.logit_scale) logit_scale=config.logit_scale)
if loss_type == 'weighted_logits_softmax':
config = loss_config.weighted_logits_softmax
return losses.WeightedSoftmaxClassificationAgainstLogitsLoss(
logit_scale=config.logit_scale)
if loss_type == 'bootstrapped_sigmoid': if loss_type == 'bootstrapped_sigmoid':
config = loss_config.bootstrapped_sigmoid config = loss_config.bootstrapped_sigmoid
return losses.BootstrappedSigmoidClassificationLoss( return losses.BootstrappedSigmoidClassificationLoss(
......
...@@ -207,6 +207,24 @@ class ClassificationLossBuilderTest(tf.test.TestCase): ...@@ -207,6 +207,24 @@ class ClassificationLossBuilderTest(tf.test.TestCase):
self.assertTrue(isinstance(classification_loss, self.assertTrue(isinstance(classification_loss,
losses.WeightedSoftmaxClassificationLoss)) losses.WeightedSoftmaxClassificationLoss))
def test_build_weighted_logits_softmax_classification_loss(self):
losses_text_proto = """
classification_loss {
weighted_logits_softmax {
}
}
localization_loss {
weighted_l2 {
}
}
"""
losses_proto = losses_pb2.Loss()
text_format.Merge(losses_text_proto, losses_proto)
classification_loss, _, _, _, _ = losses_builder.build(losses_proto)
self.assertTrue(
isinstance(classification_loss,
losses.WeightedSoftmaxClassificationAgainstLogitsLoss))
def test_build_weighted_softmax_classification_loss_with_logit_scale(self): def test_build_weighted_softmax_classification_loss_with_logit_scale(self):
losses_text_proto = """ losses_text_proto = """
classification_loss { classification_loss {
...@@ -442,6 +460,19 @@ class FasterRcnnClassificationLossBuilderTest(tf.test.TestCase): ...@@ -442,6 +460,19 @@ class FasterRcnnClassificationLossBuilderTest(tf.test.TestCase):
self.assertTrue(isinstance(classification_loss, self.assertTrue(isinstance(classification_loss,
losses.WeightedSoftmaxClassificationLoss)) losses.WeightedSoftmaxClassificationLoss))
def test_build_logits_softmax_loss(self):
losses_text_proto = """
weighted_logits_softmax {
}
"""
losses_proto = losses_pb2.ClassificationLoss()
text_format.Merge(losses_text_proto, losses_proto)
classification_loss = losses_builder.build_faster_rcnn_classification_loss(
losses_proto)
self.assertTrue(
isinstance(classification_loss,
losses.WeightedSoftmaxClassificationAgainstLogitsLoss))
def test_build_softmax_loss_by_default(self): def test_build_softmax_loss_by_default(self):
losses_text_proto = """ losses_text_proto = """
""" """
......
...@@ -308,7 +308,8 @@ class MaskRCNNBoxPredictor(BoxPredictor): ...@@ -308,7 +308,8 @@ class MaskRCNNBoxPredictor(BoxPredictor):
mask_prediction_num_conv_layers=2, mask_prediction_num_conv_layers=2,
mask_prediction_conv_depth=256, mask_prediction_conv_depth=256,
masks_are_class_agnostic=False, masks_are_class_agnostic=False,
predict_keypoints=False): predict_keypoints=False,
share_box_across_classes=False):
"""Constructor. """Constructor.
Args: Args:
...@@ -341,7 +342,8 @@ class MaskRCNNBoxPredictor(BoxPredictor): ...@@ -341,7 +342,8 @@ class MaskRCNNBoxPredictor(BoxPredictor):
masks_are_class_agnostic: Boolean determining if the mask-head is masks_are_class_agnostic: Boolean determining if the mask-head is
class-agnostic or not. class-agnostic or not.
predict_keypoints: Whether to predict keypoints insde detection boxes. predict_keypoints: Whether to predict keypoints insde detection boxes.
share_box_across_classes: Whether to share boxes across classes rather
than use a different box for each class.
Raises: Raises:
ValueError: If predict_instance_masks is true but conv_hyperparams is not ValueError: If predict_instance_masks is true but conv_hyperparams is not
...@@ -362,6 +364,7 @@ class MaskRCNNBoxPredictor(BoxPredictor): ...@@ -362,6 +364,7 @@ class MaskRCNNBoxPredictor(BoxPredictor):
self._mask_prediction_conv_depth = mask_prediction_conv_depth self._mask_prediction_conv_depth = mask_prediction_conv_depth
self._masks_are_class_agnostic = masks_are_class_agnostic self._masks_are_class_agnostic = masks_are_class_agnostic
self._predict_keypoints = predict_keypoints self._predict_keypoints = predict_keypoints
self._share_box_across_classes = share_box_across_classes
if self._predict_keypoints: if self._predict_keypoints:
raise ValueError('Keypoint prediction is unimplemented.') raise ValueError('Keypoint prediction is unimplemented.')
if ((self._predict_instance_masks or self._predict_keypoints) and if ((self._predict_instance_masks or self._predict_keypoints) and
...@@ -403,10 +406,14 @@ class MaskRCNNBoxPredictor(BoxPredictor): ...@@ -403,10 +406,14 @@ class MaskRCNNBoxPredictor(BoxPredictor):
flattened_image_features = slim.dropout(flattened_image_features, flattened_image_features = slim.dropout(flattened_image_features,
keep_prob=self._dropout_keep_prob, keep_prob=self._dropout_keep_prob,
is_training=self._is_training) is_training=self._is_training)
number_of_boxes = 1
if not self._share_box_across_classes:
number_of_boxes = self._num_classes
with slim.arg_scope(self._fc_hyperparams_fn()): with slim.arg_scope(self._fc_hyperparams_fn()):
box_encodings = slim.fully_connected( box_encodings = slim.fully_connected(
flattened_image_features, flattened_image_features,
self._num_classes * self._box_code_size, number_of_boxes * self._box_code_size,
activation_fn=None, activation_fn=None,
scope='BoxEncodingPredictor') scope='BoxEncodingPredictor')
class_predictions_with_background = slim.fully_connected( class_predictions_with_background = slim.fully_connected(
...@@ -415,7 +422,7 @@ class MaskRCNNBoxPredictor(BoxPredictor): ...@@ -415,7 +422,7 @@ class MaskRCNNBoxPredictor(BoxPredictor):
activation_fn=None, activation_fn=None,
scope='ClassPredictor') scope='ClassPredictor')
box_encodings = tf.reshape( box_encodings = tf.reshape(
box_encodings, [-1, 1, self._num_classes, self._box_code_size]) box_encodings, [-1, 1, number_of_boxes, self._box_code_size])
class_predictions_with_background = tf.reshape( class_predictions_with_background = tf.reshape(
class_predictions_with_background, [-1, 1, self._num_classes + 1]) class_predictions_with_background, [-1, 1, self._num_classes + 1])
return box_encodings, class_predictions_with_background return box_encodings, class_predictions_with_background
...@@ -778,7 +785,9 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor): ...@@ -778,7 +785,9 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
num_layers_before_predictor, num_layers_before_predictor,
box_code_size, box_code_size,
kernel_size=3, kernel_size=3,
class_prediction_bias_init=0.0): class_prediction_bias_init=0.0,
use_dropout=False,
dropout_keep_prob=0.8):
"""Constructor. """Constructor.
Args: Args:
...@@ -796,6 +805,8 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor): ...@@ -796,6 +805,8 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
kernel_size: Size of final convolution kernel. kernel_size: Size of final convolution kernel.
class_prediction_bias_init: constant value to initialize bias of the last class_prediction_bias_init: constant value to initialize bias of the last
conv2d layer before class prediction. conv2d layer before class prediction.
use_dropout: Whether to apply dropout to class prediction head.
dropout_keep_prob: Probability of keeping activiations.
""" """
super(WeightSharedConvolutionalBoxPredictor, self).__init__(is_training, super(WeightSharedConvolutionalBoxPredictor, self).__init__(is_training,
num_classes) num_classes)
...@@ -805,6 +816,8 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor): ...@@ -805,6 +816,8 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
self._box_code_size = box_code_size self._box_code_size = box_code_size
self._kernel_size = kernel_size self._kernel_size = kernel_size
self._class_prediction_bias_init = class_prediction_bias_init self._class_prediction_bias_init = class_prediction_bias_init
self._use_dropout = use_dropout
self._dropout_keep_prob = dropout_keep_prob
def _predict(self, image_features, num_predictions_per_location_list): def _predict(self, image_features, num_predictions_per_location_list):
"""Computes encoded object locations and corresponding confidences. """Computes encoded object locations and corresponding confidences.
...@@ -867,6 +880,7 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor): ...@@ -867,6 +880,7 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
num_predictions_per_location * self._box_code_size, num_predictions_per_location * self._box_code_size,
[self._kernel_size, self._kernel_size], [self._kernel_size, self._kernel_size],
activation_fn=None, stride=1, padding='SAME', activation_fn=None, stride=1, padding='SAME',
normalizer_fn=None,
scope='BoxEncodingPredictor') scope='BoxEncodingPredictor')
for i in range(self._num_layers_before_predictor): for i in range(self._num_layers_before_predictor):
...@@ -877,11 +891,15 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor): ...@@ -877,11 +891,15 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
stride=1, stride=1,
padding='SAME', padding='SAME',
scope='ClassPredictionTower/conv2d_{}'.format(i)) scope='ClassPredictionTower/conv2d_{}'.format(i))
if self._use_dropout:
class_predictions_net = slim.dropout(
class_predictions_net, keep_prob=self._dropout_keep_prob)
class_predictions_with_background = slim.conv2d( class_predictions_with_background = slim.conv2d(
class_predictions_net, class_predictions_net,
num_predictions_per_location * num_class_slots, num_predictions_per_location * num_class_slots,
[self._kernel_size, self._kernel_size], [self._kernel_size, self._kernel_size],
activation_fn=None, stride=1, padding='SAME', activation_fn=None, stride=1, padding='SAME',
normalizer_fn=None,
biases_initializer=tf.constant_initializer( biases_initializer=tf.constant_initializer(
self._class_prediction_bias_init), self._class_prediction_bias_init),
scope='ClassPredictor') scope='ClassPredictor')
......
...@@ -70,6 +70,33 @@ class MaskRCNNBoxPredictorTest(tf.test.TestCase): ...@@ -70,6 +70,33 @@ class MaskRCNNBoxPredictorTest(tf.test.TestCase):
self.assertAllEqual(box_encodings_shape, [2, 1, 5, 4]) self.assertAllEqual(box_encodings_shape, [2, 1, 5, 4])
self.assertAllEqual(class_predictions_with_background_shape, [2, 1, 6]) self.assertAllEqual(class_predictions_with_background_shape, [2, 1, 6])
def test_get_boxes_with_five_classes_share_box_across_classes(self):
image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32)
mask_box_predictor = box_predictor.MaskRCNNBoxPredictor(
is_training=False,
num_classes=5,
fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
use_dropout=False,
dropout_keep_prob=0.5,
box_code_size=4,
share_box_across_classes=True
)
box_predictions = mask_box_predictor.predict(
[image_features], num_predictions_per_location=[1],
scope='BoxPredictor')
box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
class_predictions_with_background = box_predictions[
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
(box_encodings_shape,
class_predictions_with_background_shape) = sess.run(
[tf.shape(box_encodings),
tf.shape(class_predictions_with_background)])
self.assertAllEqual(box_encodings_shape, [2, 1, 1, 4])
self.assertAllEqual(class_predictions_with_background_shape, [2, 1, 6])
def test_value_error_on_predict_instance_masks_with_no_conv_hyperparms(self): def test_value_error_on_predict_instance_masks_with_no_conv_hyperparms(self):
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
box_predictor.MaskRCNNBoxPredictor( box_predictor.MaskRCNNBoxPredictor(
...@@ -403,9 +430,14 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase): ...@@ -403,9 +430,14 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
} }
} }
initializer { initializer {
truncated_normal_initializer { random_normal_initializer {
stddev: 0.01
mean: 0.0
} }
} }
batch_norm {
train: true,
}
""" """
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.build(conv_hyperparams, is_training=True) return hyperparams_builder.build(conv_hyperparams, is_training=True)
...@@ -434,6 +466,27 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase): ...@@ -434,6 +466,27 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
self.assertAllEqual(box_encodings.shape, [4, 320, 1, 4]) self.assertAllEqual(box_encodings.shape, [4, 320, 1, 4])
self.assertAllEqual(objectness_predictions.shape, [4, 320, 1]) self.assertAllEqual(objectness_predictions.shape, [4, 320, 1])
def test_bias_predictions_to_background_with_sigmoid_score_conversion(self):
def graph_fn(image_features):
conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
is_training=True,
num_classes=2,
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
depth=32,
num_layers_before_predictor=1,
class_prediction_bias_init=-4.6,
box_code_size=4)
box_predictions = conv_box_predictor.predict(
[image_features], num_predictions_per_location=[5],
scope='BoxPredictor')
class_predictions = tf.concat(box_predictions[
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1)
return (tf.nn.sigmoid(class_predictions),)
image_features = np.random.rand(4, 8, 8, 64).astype(np.float32)
class_predictions = self.execute(graph_fn, [image_features])
self.assertAlmostEqual(np.mean(class_predictions), 0.01, places=3)
def test_get_multi_class_predictions_for_five_aspect_ratios_per_location( def test_get_multi_class_predictions_for_five_aspect_ratios_per_location(
self): self):
...@@ -524,19 +577,19 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase): ...@@ -524,19 +577,19 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxEncodingPredictionTower/conv2d_0/weights'), 'BoxEncodingPredictionTower/conv2d_0/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxEncodingPredictionTower/conv2d_0/biases'), 'BoxEncodingPredictionTower/conv2d_0/BatchNorm/beta'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxEncodingPredictionTower/conv2d_1/weights'), 'BoxEncodingPredictionTower/conv2d_1/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxEncodingPredictionTower/conv2d_1/biases'), 'BoxEncodingPredictionTower/conv2d_1/BatchNorm/beta'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_0/weights'), 'ClassPredictionTower/conv2d_0/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_0/biases'), 'ClassPredictionTower/conv2d_0/BatchNorm/beta'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_1/weights'), 'ClassPredictionTower/conv2d_1/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_1/biases'), 'ClassPredictionTower/conv2d_1/BatchNorm/beta'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxEncodingPredictor/weights'), 'BoxEncodingPredictor/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
......
...@@ -23,6 +23,7 @@ Localization losses: ...@@ -23,6 +23,7 @@ Localization losses:
Classification losses: Classification losses:
* WeightedSigmoidClassificationLoss * WeightedSigmoidClassificationLoss
* WeightedSoftmaxClassificationLoss * WeightedSoftmaxClassificationLoss
* WeightedSoftmaxClassificationAgainstLogitsLoss
* BootstrappedSigmoidClassificationLoss * BootstrappedSigmoidClassificationLoss
""" """
from abc import ABCMeta from abc import ABCMeta
...@@ -317,6 +318,54 @@ class WeightedSoftmaxClassificationLoss(Loss): ...@@ -317,6 +318,54 @@ class WeightedSoftmaxClassificationLoss(Loss):
return tf.reshape(per_row_cross_ent, tf.shape(weights)) * weights return tf.reshape(per_row_cross_ent, tf.shape(weights)) * weights
class WeightedSoftmaxClassificationAgainstLogitsLoss(Loss):
"""Softmax loss function against logits.
Targets are expected to be provided in logits space instead of "one hot" or
"probability distribution" space.
"""
def __init__(self, logit_scale=1.0):
"""Constructor.
Args:
logit_scale: When this value is high, the target is "diffused" and
when this value is low, the target is made peakier.
(default 1.0)
"""
self._logit_scale = logit_scale
def _scale_and_softmax_logits(self, logits):
"""Scale logits then apply softmax."""
scaled_logits = tf.divide(logits, self._logit_scale, name='scale_logits')
return tf.nn.softmax(scaled_logits, name='convert_scores')
def _compute_loss(self, prediction_tensor, target_tensor, weights):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing the predicted logits for each class
target_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing logit classification targets
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a float tensor of shape [batch_size, num_anchors]
representing the value of the loss function.
"""
num_classes = prediction_tensor.get_shape().as_list()[-1]
target_tensor = self._scale_and_softmax_logits(target_tensor)
prediction_tensor = tf.divide(prediction_tensor, self._logit_scale,
name='scale_logits')
per_row_cross_ent = (tf.nn.softmax_cross_entropy_with_logits(
labels=tf.reshape(target_tensor, [-1, num_classes]),
logits=tf.reshape(prediction_tensor, [-1, num_classes])))
return tf.reshape(per_row_cross_ent, tf.shape(weights)) * weights
class BootstrappedSigmoidClassificationLoss(Loss): class BootstrappedSigmoidClassificationLoss(Loss):
"""Bootstrapped sigmoid cross entropy classification loss function. """Bootstrapped sigmoid cross entropy classification loss function.
......
...@@ -576,6 +576,111 @@ class WeightedSoftmaxClassificationLossTest(tf.test.TestCase): ...@@ -576,6 +576,111 @@ class WeightedSoftmaxClassificationLossTest(tf.test.TestCase):
self.assertAllClose(loss_output, exp_loss) self.assertAllClose(loss_output, exp_loss)
class WeightedSoftmaxClassificationAgainstLogitsLossTest(tf.test.TestCase):
def testReturnsCorrectLoss(self):
prediction_tensor = tf.constant([[[-100, 100, -100],
[100, -100, -100],
[0, 0, -100],
[-100, -100, 100]],
[[-100, 0, 0],
[-100, 100, -100],
[-100, 100, -100],
[100, -100, -100]]], tf.float32)
target_tensor = tf.constant([[[-100, 100, -100],
[100, -100, -100],
[100, -100, -100],
[-100, -100, 100]],
[[-100, -100, 100],
[-100, 100, -100],
[-100, 100, -100],
[100, -100, -100]]], tf.float32)
weights = tf.constant([[1, 1, .5, 1],
[1, 1, 1, 1]], tf.float32)
loss_op = losses.WeightedSoftmaxClassificationAgainstLogitsLoss()
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
loss = tf.reduce_sum(loss)
exp_loss = - 1.5 * math.log(.5)
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, exp_loss)
def testReturnsCorrectAnchorWiseLoss(self):
prediction_tensor = tf.constant([[[-100, 100, -100],
[100, -100, -100],
[0, 0, -100],
[-100, -100, 100]],
[[-100, 0, 0],
[-100, 100, -100],
[-100, 100, -100],
[100, -100, -100]]], tf.float32)
target_tensor = tf.constant([[[-100, 100, -100],
[100, -100, -100],
[100, -100, -100],
[-100, -100, 100]],
[[-100, -100, 100],
[-100, 100, -100],
[-100, 100, -100],
[100, -100, -100]]], tf.float32)
weights = tf.constant([[1, 1, .5, 1],
[1, 1, 1, 0]], tf.float32)
loss_op = losses.WeightedSoftmaxClassificationAgainstLogitsLoss()
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
exp_loss = np.matrix([[0, 0, - 0.5 * math.log(.5), 0],
[-math.log(.5), 0, 0, 0]])
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, exp_loss)
def testReturnsCorrectAnchorWiseLossWithLogitScaleSetting(self):
logit_scale = 100.
prediction_tensor = tf.constant([[[-100, 100, -100],
[100, -100, -100],
[0, 0, -100],
[-100, -100, 100]],
[[-100, 0, 0],
[-100, 100, -100],
[-100, 100, -100],
[100, -100, -100]]], tf.float32)
target_tensor = tf.constant([[[-100, 100, -100],
[100, -100, -100],
[0, 0, -100],
[-100, -100, 100]],
[[-100, 0, 0],
[-100, 100, -100],
[-100, 100, -100],
[100, -100, -100]]], tf.float32)
weights = tf.constant([[1, 1, .5, 1],
[1, 1, 1, 0]], tf.float32)
loss_op = losses.WeightedSoftmaxClassificationAgainstLogitsLoss(
logit_scale=logit_scale)
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
# find softmax of the two prediction types above
softmax_pred1 = [np.exp(-1), np.exp(-1), np.exp(1)]
softmax_pred1 /= sum(softmax_pred1)
softmax_pred2 = [np.exp(0), np.exp(0), np.exp(-1)]
softmax_pred2 /= sum(softmax_pred2)
# compute the expected cross entropy for perfect matches
exp_entropy1 = sum(
[-x*np.log(x) for x in softmax_pred1])
exp_entropy2 = sum(
[-x*np.log(x) for x in softmax_pred2])
# weighted expected losses
exp_loss = np.matrix(
[[exp_entropy1, exp_entropy1, exp_entropy2*.5, exp_entropy1],
[exp_entropy2, exp_entropy1, exp_entropy1, 0.]])
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, exp_loss)
class BootstrappedSigmoidClassificationLossTest(tf.test.TestCase): class BootstrappedSigmoidClassificationLossTest(tf.test.TestCase):
def testReturnsCorrectLossSoftBootstrapping(self): def testReturnsCorrectLossSoftBootstrapping(self):
......
...@@ -1000,8 +1000,8 @@ def random_adjust_saturation(image, ...@@ -1000,8 +1000,8 @@ def random_adjust_saturation(image,
def random_distort_color(image, color_ordering=0, preprocess_vars_cache=None): def random_distort_color(image, color_ordering=0, preprocess_vars_cache=None):
"""Randomly distorts color. """Randomly distorts color.
Randomly distorts color using a combination of brightness, hue, contrast Randomly distorts color using a combination of brightness, hue, contrast and
and saturation changes. Makes sure the output image is still between 0 and 255. saturation changes. Makes sure the output image is still between 0 and 255.
Args: Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels] image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
......
...@@ -2620,16 +2620,24 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -2620,16 +2620,24 @@ class PreprocessorTest(tf.test.TestCase):
distorted_images_rank = tf.rank(distorted_images) distorted_images_rank = tf.rank(distorted_images)
boxes_rank = tf.rank(boxes) boxes_rank = tf.rank(boxes)
distorted_boxes_rank = tf.rank(distorted_boxes) distorted_boxes_rank = tf.rank(distorted_boxes)
multiclass_scores_rank = tf.rank(multiclass_scores)
distorted_multiclass_scores_rank = tf.rank(distorted_multiclass_scores)
with self.test_session() as sess: with self.test_session() as sess:
(boxes_rank_, distorted_boxes_rank_, images_rank_, distorted_images_rank_, (boxes_rank_, distorted_boxes_, distorted_boxes_rank_, images_rank_,
multiclass_scores_, distorted_multiclass_scores_) = sess.run([ distorted_images_rank_, multiclass_scores_rank_,
boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank, distorted_multiclass_scores_,
multiclass_scores, distorted_multiclass_scores distorted_multiclass_scores_rank_) = sess.run([
boxes_rank, distorted_boxes, distorted_boxes_rank, images_rank,
distorted_images_rank, multiclass_scores_rank,
distorted_multiclass_scores, distorted_multiclass_scores_rank
]) ])
self.assertAllEqual(boxes_rank_, distorted_boxes_rank_) self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
self.assertAllEqual(images_rank_, distorted_images_rank_) self.assertAllEqual(images_rank_, distorted_images_rank_)
self.assertAllEqual(multiclass_scores_, distorted_multiclass_scores_) self.assertAllEqual(multiclass_scores_rank_,
distorted_multiclass_scores_rank_)
self.assertAllEqual(distorted_boxes_.shape[0],
distorted_multiclass_scores_.shape[0])
def testSSDRandomCropPad(self): def testSSDRandomCropPad(self):
images = self.createTestImages() images = self.createTestImages()
......
...@@ -111,7 +111,8 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -111,7 +111,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
instance_mask_type=input_reader_pb2.NUMERICAL_MASKS, instance_mask_type=input_reader_pb2.NUMERICAL_MASKS,
label_map_proto_file=None, label_map_proto_file=None,
use_display_name=False, use_display_name=False,
dct_method=''): dct_method='',
num_keypoints=0):
"""Constructor sets keys_to_features and items_to_handlers. """Constructor sets keys_to_features and items_to_handlers.
Args: Args:
...@@ -131,6 +132,7 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -131,6 +132,7 @@ class TfExampleDecoder(data_decoder.DataDecoder):
algorithm used for jpeg decompression. Currently valid values algorithm used for jpeg decompression. Currently valid values
are ['INTEGER_FAST', 'INTEGER_ACCURATE']. The hint may be ignored, for are ['INTEGER_FAST', 'INTEGER_ACCURATE']. The hint may be ignored, for
example, the jpeg library does not have that specific option. example, the jpeg library does not have that specific option.
num_keypoints: the number of keypoints per object.
Raises: Raises:
ValueError: If `instance_mask_type` option is not one of ValueError: If `instance_mask_type` option is not one of
...@@ -149,9 +151,9 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -149,9 +151,9 @@ class TfExampleDecoder(data_decoder.DataDecoder):
'image/source_id': 'image/source_id':
tf.FixedLenFeature((), tf.string, default_value=''), tf.FixedLenFeature((), tf.string, default_value=''),
'image/height': 'image/height':
tf.FixedLenFeature((), tf.int64, 1), tf.FixedLenFeature((), tf.int64, default_value=1),
'image/width': 'image/width':
tf.FixedLenFeature((), tf.int64, 1), tf.FixedLenFeature((), tf.int64, default_value=1),
# Object boxes and classes. # Object boxes and classes.
'image/object/bbox/xmin': 'image/object/bbox/xmin':
tf.VarLenFeature(tf.float32), tf.VarLenFeature(tf.float32),
...@@ -209,6 +211,16 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -209,6 +211,16 @@ class TfExampleDecoder(data_decoder.DataDecoder):
fields.InputDataFields.groundtruth_weights: ( fields.InputDataFields.groundtruth_weights: (
slim_example_decoder.Tensor('image/object/weight')), slim_example_decoder.Tensor('image/object/weight')),
} }
self._num_keypoints = num_keypoints
if num_keypoints > 0:
self.keys_to_features['image/object/keypoint/x'] = (
tf.VarLenFeature(tf.float32))
self.keys_to_features['image/object/keypoint/y'] = (
tf.VarLenFeature(tf.float32))
self.items_to_handlers[fields.InputDataFields.groundtruth_keypoints] = (
slim_example_decoder.ItemHandlerCallback(
['image/object/keypoint/y', 'image/object/keypoint/x'],
self._reshape_keypoints))
if load_instance_masks: if load_instance_masks:
if instance_mask_type in (input_reader_pb2.DEFAULT, if instance_mask_type in (input_reader_pb2.DEFAULT,
input_reader_pb2.NUMERICAL_MASKS): input_reader_pb2.NUMERICAL_MASKS):
...@@ -286,6 +298,9 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -286,6 +298,9 @@ class TfExampleDecoder(data_decoder.DataDecoder):
[None] indicating if the boxes represent `difficult` instances. [None] indicating if the boxes represent `difficult` instances.
fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape
[None] indicating if the boxes represent `group_of` instances. [None] indicating if the boxes represent `group_of` instances.
fields.InputDataFields.groundtruth_keypoints - 3D float32 tensor of
shape [None, None, 2] containing keypoints, where the coordinates of
the keypoints are ordered (y, x).
fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of
shape [None, None, None] containing instance masks. shape [None, None, None] containing instance masks.
""" """
...@@ -314,6 +329,31 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -314,6 +329,31 @@ class TfExampleDecoder(data_decoder.DataDecoder):
default_groundtruth_weights) default_groundtruth_weights)
return tensor_dict return tensor_dict
def _reshape_keypoints(self, keys_to_tensors):
"""Reshape keypoints.
The instance segmentation masks are reshaped to [num_instances,
num_keypoints, 2].
Args:
keys_to_tensors: a dictionary from keys to tensors.
Returns:
A 3-D float tensor of shape [num_instances, num_keypoints, 2] with values
in {0, 1}.
"""
y = keys_to_tensors['image/object/keypoint/y']
if isinstance(y, tf.SparseTensor):
y = tf.sparse_tensor_to_dense(y)
y = tf.expand_dims(y, 1)
x = keys_to_tensors['image/object/keypoint/x']
if isinstance(x, tf.SparseTensor):
x = tf.sparse_tensor_to_dense(x)
x = tf.expand_dims(x, 1)
keypoints = tf.concat([y, x], 1)
keypoints = tf.reshape(keypoints, [-1, self._num_keypoints, 2])
return keypoints
def _reshape_instance_masks(self, keys_to_tensors): def _reshape_instance_masks(self, keys_to_tensors):
"""Reshape instance segmentation masks. """Reshape instance segmentation masks.
......
...@@ -304,6 +304,50 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -304,6 +304,50 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertAllEqual( self.assertAllEqual(
2, tensor_dict[fields.InputDataFields.num_groundtruth_boxes]) 2, tensor_dict[fields.InputDataFields.num_groundtruth_boxes])
def testDecodeKeypoint(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_ymins = [0.0, 4.0]
bbox_xmins = [1.0, 5.0]
bbox_ymaxs = [2.0, 6.0]
bbox_xmaxs = [3.0, 7.0]
keypoint_ys = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
keypoint_xs = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/bbox/ymin': self._FloatFeature(bbox_ymins),
'image/object/bbox/xmin': self._FloatFeature(bbox_xmins),
'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs),
'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs),
'image/object/keypoint/y': self._FloatFeature(keypoint_ys),
'image/object/keypoint/x': self._FloatFeature(keypoint_xs),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder(num_keypoints=3)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes].
get_shape().as_list()), [None, 4])
self.assertAllEqual((tensor_dict[fields.InputDataFields.
groundtruth_keypoints].
get_shape().as_list()), [None, 3, 2])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
expected_boxes = np.vstack([bbox_ymins, bbox_xmins,
bbox_ymaxs, bbox_xmaxs]).transpose()
self.assertAllEqual(expected_boxes,
tensor_dict[fields.InputDataFields.groundtruth_boxes])
self.assertAllEqual(
2, tensor_dict[fields.InputDataFields.num_groundtruth_boxes])
expected_keypoints = (
np.vstack([keypoint_ys, keypoint_xs]).transpose().reshape((2, 3, 2)))
self.assertAllEqual(expected_keypoints,
tensor_dict[
fields.InputDataFields.groundtruth_keypoints])
def testDecodeDefaultGroundtruthWeights(self): def testDecodeDefaultGroundtruthWeights(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
Tensorflow Object Detection API depends on the following libraries: Tensorflow Object Detection API depends on the following libraries:
* Protobuf 2.6 * Protobuf 3+
* Python-tk * Python-tk
* Pillow 1.0 * Pillow 1.0
* lxml * lxml
......
...@@ -599,9 +599,11 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -599,9 +599,11 @@ class FasterRCNNMetaArch(model.DetectionModel):
(and if number_of_stages > 1): (and if number_of_stages > 1):
7) refined_box_encodings: a 3-D tensor with shape 7) refined_box_encodings: a 3-D tensor with shape
[total_num_proposals, num_classes, 4] representing predicted [total_num_proposals, num_classes, self._box_coder.code_size]
(final) refined box encodings, where representing predicted (final) refined box encodings, where
total_num_proposals=batch_size*self._max_num_proposals total_num_proposals=batch_size*self._max_num_proposals. If using
a shared box across classes the shape will instead be
[total_num_proposals, 1, self._box_coder.code_size].
8) class_predictions_with_background: a 3-D tensor with shape 8) class_predictions_with_background: a 3-D tensor with shape
[total_num_proposals, num_classes + 1] containing class [total_num_proposals, num_classes + 1] containing class
predictions (logits) for each of the anchors, where predictions (logits) for each of the anchors, where
...@@ -712,9 +714,11 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -712,9 +714,11 @@ class FasterRCNNMetaArch(model.DetectionModel):
Returns: Returns:
prediction_dict: a dictionary holding "raw" prediction tensors: prediction_dict: a dictionary holding "raw" prediction tensors:
1) refined_box_encodings: a 3-D tensor with shape 1) refined_box_encodings: a 3-D tensor with shape
[total_num_proposals, num_classes, 4] representing predicted [total_num_proposals, num_classes, self._box_coder.code_size]
(final) refined box encodings, where representing predicted (final) refined box encodings, where
total_num_proposals=batch_size*self._max_num_proposals total_num_proposals=batch_size*self._max_num_proposals. If using a
shared box across classes the shape will instead be
[total_num_proposals, 1, self._box_coder.code_size].
2) class_predictions_with_background: a 3-D tensor with shape 2) class_predictions_with_background: a 3-D tensor with shape
[total_num_proposals, num_classes + 1] containing class [total_num_proposals, num_classes + 1] containing class
predictions (logits) for each of the anchors, where predictions (logits) for each of the anchors, where
...@@ -791,9 +795,11 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -791,9 +795,11 @@ class FasterRCNNMetaArch(model.DetectionModel):
Args: Args:
prediction_dict: a dictionary holding "raw" prediction tensors: prediction_dict: a dictionary holding "raw" prediction tensors:
1) refined_box_encodings: a 3-D tensor with shape 1) refined_box_encodings: a 3-D tensor with shape
[total_num_proposals, num_classes, 4] representing predicted [total_num_proposals, num_classes, self._box_coder.code_size]
(final) refined box encodings, where representing predicted (final) refined box encodings, where
total_num_proposals=batch_size*self._max_num_proposals total_num_proposals=batch_size*self._max_num_proposals. If using a
shared box across classes the shape will instead be
[total_num_proposals, 1, self._box_coder.code_size].
2) class_predictions_with_background: a 3-D tensor with shape 2) class_predictions_with_background: a 3-D tensor with shape
[total_num_proposals, num_classes + 1] containing class [total_num_proposals, num_classes + 1] containing class
predictions (logits) for each of the anchors, where predictions (logits) for each of the anchors, where
...@@ -823,13 +829,13 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -823,13 +829,13 @@ class FasterRCNNMetaArch(model.DetectionModel):
if self._is_training: if self._is_training:
curr_box_classifier_features = prediction_dict['box_classifier_features'] curr_box_classifier_features = prediction_dict['box_classifier_features']
detection_classes = prediction_dict['class_predictions_with_background'] detection_classes = prediction_dict['class_predictions_with_background']
box_predictions = self._mask_rcnn_box_predictor.predict( mask_predictions = self._mask_rcnn_box_predictor.predict(
[curr_box_classifier_features], [curr_box_classifier_features],
num_predictions_per_location=[1], num_predictions_per_location=[1],
scope=self.second_stage_box_predictor_scope, scope=self.second_stage_box_predictor_scope,
predict_boxes_and_classes=False, predict_boxes_and_classes=False,
predict_auxiliary_outputs=True) predict_auxiliary_outputs=True)
prediction_dict['mask_predictions'] = tf.squeeze(box_predictions[ prediction_dict['mask_predictions'] = tf.squeeze(mask_predictions[
box_predictor.MASK_PREDICTIONS], axis=1) box_predictor.MASK_PREDICTIONS], axis=1)
else: else:
detections_dict = self._postprocess_box_classifier( detections_dict = self._postprocess_box_classifier(
...@@ -854,14 +860,14 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -854,14 +860,14 @@ class FasterRCNNMetaArch(model.DetectionModel):
flattened_detected_feature_maps, flattened_detected_feature_maps,
scope=self.second_stage_feature_extractor_scope)) scope=self.second_stage_feature_extractor_scope))
box_predictions = self._mask_rcnn_box_predictor.predict( mask_predictions = self._mask_rcnn_box_predictor.predict(
[curr_box_classifier_features], [curr_box_classifier_features],
num_predictions_per_location=[1], num_predictions_per_location=[1],
scope=self.second_stage_box_predictor_scope, scope=self.second_stage_box_predictor_scope,
predict_boxes_and_classes=False, predict_boxes_and_classes=False,
predict_auxiliary_outputs=True) predict_auxiliary_outputs=True)
detection_masks = tf.squeeze(box_predictions[ detection_masks = tf.squeeze(mask_predictions[
box_predictor.MASK_PREDICTIONS], axis=1) box_predictor.MASK_PREDICTIONS], axis=1)
_, num_classes, mask_height, mask_width = ( _, num_classes, mask_height, mask_width = (
...@@ -1098,8 +1104,10 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1098,8 +1104,10 @@ class FasterRCNNMetaArch(model.DetectionModel):
tf.to_float(num_proposals), tf.to_float(num_proposals),
} }
# TODO(jrru): Remove mask_predictions from _post_process_box_classifier.
with tf.name_scope('SecondStagePostprocessor'): with tf.name_scope('SecondStagePostprocessor'):
if self._number_of_stages == 2: if (self._number_of_stages == 2 or
(self._number_of_stages == 3 and self._is_training)):
mask_predictions = prediction_dict.get(box_predictor.MASK_PREDICTIONS) mask_predictions = prediction_dict.get(box_predictor.MASK_PREDICTIONS)
detections_dict = self._postprocess_box_classifier( detections_dict = self._postprocess_box_classifier(
prediction_dict['refined_box_encodings'], prediction_dict['refined_box_encodings'],
...@@ -1438,8 +1446,10 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1438,8 +1446,10 @@ class FasterRCNNMetaArch(model.DetectionModel):
Args: Args:
refined_box_encodings: a 3-D float tensor with shape refined_box_encodings: a 3-D float tensor with shape
[total_num_padded_proposals, num_classes, 4] representing predicted [total_num_padded_proposals, num_classes, self._box_coder.code_size]
(final) refined box encodings. representing predicted (final) refined box encodings. If using a shared
box across classes the shape will instead be
[total_num_padded_proposals, 1, 4]
class_predictions_with_background: a 3-D tensor float with shape class_predictions_with_background: a 3-D tensor float with shape
[total_num_padded_proposals, num_classes + 1] containing class [total_num_padded_proposals, num_classes + 1] containing class
predictions (logits) for each of the proposals. Note that this tensor predictions (logits) for each of the proposals. Note that this tensor
...@@ -1466,10 +1476,12 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1466,10 +1476,12 @@ class FasterRCNNMetaArch(model.DetectionModel):
that a pixel-wise sigmoid score converter is applied to the detection that a pixel-wise sigmoid score converter is applied to the detection
masks. masks.
""" """
refined_box_encodings_batch = tf.reshape(refined_box_encodings, refined_box_encodings_batch = tf.reshape(
[-1, self.max_num_proposals, refined_box_encodings,
self.num_classes, [-1,
self._box_coder.code_size]) self.max_num_proposals,
refined_box_encodings.shape[1],
self._box_coder.code_size])
class_predictions_with_background_batch = tf.reshape( class_predictions_with_background_batch = tf.reshape(
class_predictions_with_background, class_predictions_with_background,
[-1, self.max_num_proposals, self.num_classes + 1] [-1, self.max_num_proposals, self.num_classes + 1]
...@@ -1517,13 +1529,18 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1517,13 +1529,18 @@ class FasterRCNNMetaArch(model.DetectionModel):
box_encodings: a 4-D tensor with shape box_encodings: a 4-D tensor with shape
[batch_size, num_anchors, num_classes, self._box_coder.code_size] [batch_size, num_anchors, num_classes, self._box_coder.code_size]
representing box encodings. representing box encodings.
anchor_boxes: [batch_size, num_anchors, 4] representing anchor_boxes: [batch_size, num_anchors, self._box_coder.code_size]
decoded bounding boxes. representing decoded bounding boxes. If using a shared box across
classes the shape will instead be
[total_num_proposals, 1, self._box_coder.code_size].
Returns: Returns:
decoded_boxes: a [batch_size, num_anchors, num_classes, 4] decoded_boxes: a
float tensor representing bounding box predictions [batch_size, num_anchors, num_classes, self._box_coder.code_size]
(for each image in batch, proposal and class). float tensor representing bounding box predictions (for each image in
batch, proposal and class). If using a shared box across classes the
shape will instead be
[batch_size, num_anchors, 1, self._box_coder.code_size].
""" """
combined_shape = shape_utils.combined_static_and_dynamic_shape( combined_shape = shape_utils.combined_static_and_dynamic_shape(
box_encodings) box_encodings)
...@@ -1697,7 +1714,9 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1697,7 +1714,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
Args: Args:
refined_box_encodings: a 3-D tensor with shape refined_box_encodings: a 3-D tensor with shape
[total_num_proposals, num_classes, box_coder.code_size] representing [total_num_proposals, num_classes, box_coder.code_size] representing
predicted (final) refined box encodings. predicted (final) refined box encodings. If using a shared box across
classes this will instead have shape
[total_num_proposals, 1, box_coder.code_size].
class_predictions_with_background: a 2-D tensor with shape class_predictions_with_background: a 2-D tensor with shape
[total_num_proposals, num_classes + 1] containing class [total_num_proposals, num_classes + 1] containing class
predictions (logits) for each of the anchors. Note that this tensor predictions (logits) for each of the anchors. Note that this tensor
...@@ -1748,31 +1767,39 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1748,31 +1767,39 @@ class FasterRCNNMetaArch(model.DetectionModel):
self._detector_target_assigner, proposal_boxlists, self._detector_target_assigner, proposal_boxlists,
groundtruth_boxlists, groundtruth_classes_with_background_list) groundtruth_boxlists, groundtruth_classes_with_background_list)
# We only predict refined location encodings for the non background class_predictions_with_background = tf.reshape(
# classes, but we now pad it to make it compatible with the class class_predictions_with_background,
# predictions [batch_size, self.max_num_proposals, -1])
flat_cls_targets_with_background = tf.reshape( flat_cls_targets_with_background = tf.reshape(
batch_cls_targets_with_background, batch_cls_targets_with_background,
[batch_size * self.max_num_proposals, -1]) [batch_size * self.max_num_proposals, -1])
refined_box_encodings_with_background = tf.pad(
refined_box_encodings, [[0, 0], [1, 0], [0, 0]])
# For anchors with multiple labels, picks refined_location_encodings
# for just one class to avoid over-counting for regression loss and
# (optionally) mask loss.
one_hot_flat_cls_targets_with_background = tf.argmax( one_hot_flat_cls_targets_with_background = tf.argmax(
flat_cls_targets_with_background, axis=1) flat_cls_targets_with_background, axis=1)
one_hot_flat_cls_targets_with_background = tf.one_hot( one_hot_flat_cls_targets_with_background = tf.one_hot(
one_hot_flat_cls_targets_with_background, one_hot_flat_cls_targets_with_background,
flat_cls_targets_with_background.get_shape()[1]) flat_cls_targets_with_background.get_shape()[1])
refined_box_encodings_masked_by_class_targets = tf.boolean_mask(
refined_box_encodings_with_background, # If using a shared box across classes use directly
tf.greater(one_hot_flat_cls_targets_with_background, 0)) if refined_box_encodings.shape[1] == 1:
class_predictions_with_background = tf.reshape( reshaped_refined_box_encodings = tf.reshape(
class_predictions_with_background, refined_box_encodings,
[batch_size, self.max_num_proposals, -1]) [batch_size, self.max_num_proposals, self._box_coder.code_size])
reshaped_refined_box_encodings = tf.reshape( # For anchors with multiple labels, picks refined_location_encodings
refined_box_encodings_masked_by_class_targets, # for just one class to avoid over-counting for regression loss and
[batch_size, -1, 4]) # (optionally) mask loss.
else:
# We only predict refined location encodings for the non background
# classes, but we now pad it to make it compatible with the class
# predictions
refined_box_encodings_with_background = tf.pad(
refined_box_encodings, [[0, 0], [1, 0], [0, 0]])
refined_box_encodings_masked_by_class_targets = tf.boolean_mask(
refined_box_encodings_with_background,
tf.greater(one_hot_flat_cls_targets_with_background, 0))
reshaped_refined_box_encodings = tf.reshape(
refined_box_encodings_masked_by_class_targets,
[batch_size, self.max_num_proposals, self._box_coder.code_size])
second_stage_loc_losses = self._second_stage_localization_loss( second_stage_loc_losses = self._second_stage_localization_loss(
reshaped_refined_box_encodings, reshaped_refined_box_encodings,
......
...@@ -85,6 +85,46 @@ class FasterRCNNMetaArchTest( ...@@ -85,6 +85,46 @@ class FasterRCNNMetaArchTest(
self.assertTrue(np.amax(detections_out['detection_masks'] <= 1.0)) self.assertTrue(np.amax(detections_out['detection_masks'] <= 1.0))
self.assertTrue(np.amin(detections_out['detection_masks'] >= 0.0)) self.assertTrue(np.amin(detections_out['detection_masks'] >= 0.0))
def test_postprocess_second_stage_only_inference_mode_with_shared_boxes(self):
model = self._build_model(
is_training=False, number_of_stages=2, second_stage_batch_size=6)
batch_size = 2
total_num_padded_proposals = batch_size * model.max_num_proposals
proposal_boxes = tf.constant(
[[[1, 1, 2, 3],
[0, 0, 1, 1],
[.5, .5, .6, .6],
4*[0], 4*[0], 4*[0], 4*[0], 4*[0]],
[[2, 3, 6, 8],
[1, 2, 5, 3],
4*[0], 4*[0], 4*[0], 4*[0], 4*[0], 4*[0]]], dtype=tf.float32)
num_proposals = tf.constant([3, 2], dtype=tf.int32)
# This has 1 box instead of one for each class.
refined_box_encodings = tf.zeros(
[total_num_padded_proposals, 1, 4], dtype=tf.float32)
class_predictions_with_background = tf.ones(
[total_num_padded_proposals, model.num_classes+1], dtype=tf.float32)
image_shape = tf.constant([batch_size, 36, 48, 3], dtype=tf.int32)
_, true_image_shapes = model.preprocess(tf.zeros(image_shape))
detections = model.postprocess({
'refined_box_encodings': refined_box_encodings,
'class_predictions_with_background': class_predictions_with_background,
'num_proposals': num_proposals,
'proposal_boxes': proposal_boxes,
'image_shape': image_shape,
}, true_image_shapes)
with self.test_session() as sess:
detections_out = sess.run(detections)
self.assertAllEqual(detections_out['detection_boxes'].shape, [2, 5, 4])
self.assertAllClose(detections_out['detection_scores'],
[[1, 1, 1, 1, 1], [1, 1, 1, 1, 0]])
self.assertAllClose(detections_out['detection_classes'],
[[0, 0, 0, 1, 1], [0, 0, 1, 1, 0]])
self.assertAllClose(detections_out['num_detections'], [5, 4])
@parameterized.parameters( @parameterized.parameters(
{'masks_are_class_agnostic': False}, {'masks_are_class_agnostic': False},
{'masks_are_class_agnostic': True}, {'masks_are_class_agnostic': True},
......
...@@ -1284,6 +1284,106 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase): ...@@ -1284,6 +1284,106 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
self.assertAllClose(loss_dict_out[ self.assertAllClose(loss_dict_out[
'Loss/BoxClassifierLoss/classification_loss'], 0) 'Loss/BoxClassifierLoss/classification_loss'], 0)
def test_loss_full_with_shared_boxes(self):
model = self._build_model(
is_training=True, number_of_stages=2, second_stage_batch_size=6)
batch_size = 2
anchors = tf.constant(
[[0, 0, 16, 16],
[0, 16, 16, 32],
[16, 0, 32, 16],
[16, 16, 32, 32]], dtype=tf.float32)
rpn_box_encodings = tf.zeros(
[batch_size,
anchors.get_shape().as_list()[0],
BOX_CODE_SIZE], dtype=tf.float32)
# use different numbers for the objectness category to break ties in
# order of boxes returned by NMS
rpn_objectness_predictions_with_background = tf.constant([
[[-10, 13],
[10, -10],
[10, -11],
[-10, 12]],
[[10, -10],
[-10, 13],
[-10, 12],
[10, -11]]], dtype=tf.float32)
image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32)
num_proposals = tf.constant([6, 6], dtype=tf.int32)
proposal_boxes = tf.constant(
2 * [[[0, 0, 16, 16],
[0, 16, 16, 32],
[16, 0, 32, 16],
[16, 16, 32, 32],
[0, 0, 16, 16],
[0, 16, 16, 32]]], dtype=tf.float32)
refined_box_encodings = tf.zeros(
(batch_size * model.max_num_proposals,
1, # one box shared among all the classes
BOX_CODE_SIZE), dtype=tf.float32)
class_predictions_with_background = tf.constant(
[[-10, 10, -10], # first image
[10, -10, -10],
[10, -10, -10],
[-10, -10, 10],
[-10, 10, -10],
[10, -10, -10],
[10, -10, -10], # second image
[-10, 10, -10],
[-10, 10, -10],
[10, -10, -10],
[10, -10, -10],
[-10, 10, -10]], dtype=tf.float32)
mask_predictions_logits = 20 * tf.ones((batch_size *
model.max_num_proposals,
model.num_classes,
14, 14),
dtype=tf.float32)
groundtruth_boxes_list = [
tf.constant([[0, 0, .5, .5], [.5, .5, 1, 1]], dtype=tf.float32),
tf.constant([[0, .5, .5, 1], [.5, 0, 1, .5]], dtype=tf.float32)]
groundtruth_classes_list = [tf.constant([[1, 0], [0, 1]], dtype=tf.float32),
tf.constant([[1, 0], [1, 0]], dtype=tf.float32)]
# Set all elements of groundtruth mask to 1.0. In this case all proposal
# crops of the groundtruth masks should return a mask that covers the entire
# proposal. Thus, if mask_predictions_logits element values are all greater
# than 20, the loss should be zero.
groundtruth_masks_list = [tf.convert_to_tensor(np.ones((2, 32, 32)),
dtype=tf.float32),
tf.convert_to_tensor(np.ones((2, 32, 32)),
dtype=tf.float32)]
prediction_dict = {
'rpn_box_encodings': rpn_box_encodings,
'rpn_objectness_predictions_with_background':
rpn_objectness_predictions_with_background,
'image_shape': image_shape,
'anchors': anchors,
'refined_box_encodings': refined_box_encodings,
'class_predictions_with_background': class_predictions_with_background,
'proposal_boxes': proposal_boxes,
'num_proposals': num_proposals,
'mask_predictions': mask_predictions_logits
}
_, true_image_shapes = model.preprocess(tf.zeros(image_shape))
model.provide_groundtruth(groundtruth_boxes_list,
groundtruth_classes_list,
groundtruth_masks_list)
loss_dict = model.loss(prediction_dict, true_image_shapes)
with self.test_session() as sess:
loss_dict_out = sess.run(loss_dict)
self.assertAllClose(loss_dict_out['Loss/RPNLoss/localization_loss'], 0)
self.assertAllClose(loss_dict_out['Loss/RPNLoss/objectness_loss'], 0)
self.assertAllClose(loss_dict_out[
'Loss/BoxClassifierLoss/localization_loss'], 0)
self.assertAllClose(loss_dict_out[
'Loss/BoxClassifierLoss/classification_loss'], 0)
self.assertAllClose(loss_dict_out['Loss/BoxClassifierLoss/mask_loss'], 0)
def test_restore_map_for_classification_ckpt(self): def test_restore_map_for_classification_ckpt(self):
# Define mock tensorflow classification graph and save variables. # Define mock tensorflow classification graph and save variables.
test_graph_classification = tf.Graph() test_graph_classification = tf.Graph()
......
...@@ -203,26 +203,39 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -203,26 +203,39 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
def get_estimator_eval_metric_ops(self, image_id, groundtruth_boxes, def get_estimator_eval_metric_ops(self, image_id, groundtruth_boxes,
groundtruth_classes, detection_boxes, groundtruth_classes, detection_boxes,
detection_scores, detection_classes): detection_scores, detection_classes,
num_gt_boxes_per_image=None,
num_det_boxes_per_image=None):
"""Returns a dictionary of eval metric ops to use with `tf.EstimatorSpec`. """Returns a dictionary of eval metric ops to use with `tf.EstimatorSpec`.
Note that once value_op is called, the detections and groundtruth added via Note that once value_op is called, the detections and groundtruth added via
update_op are cleared. update_op are cleared.
This function can take in groundtruth and detections for a batch of images,
or for a single image. For the latter case, the batch dimension for input
tensors need not be present.
Args: Args:
image_id: Unique string/integer identifier for the image. image_id: string/integer tensor of shape [batch] with unique identifiers
groundtruth_boxes: float32 tensor of shape [num_boxes, 4] containing for the images.
`num_boxes` groundtruth boxes of the format groundtruth_boxes: float32 tensor of shape [batch, num_boxes, 4]
containing `num_boxes` groundtruth boxes of the format
[ymin, xmin, ymax, xmax] in absolute image coordinates. [ymin, xmin, ymax, xmax] in absolute image coordinates.
groundtruth_classes: int32 tensor of shape [num_boxes] containing groundtruth_classes: int32 tensor of shape [batch, num_boxes] containing
1-indexed groundtruth classes for the boxes. 1-indexed groundtruth classes for the boxes.
detection_boxes: float32 tensor of shape [num_boxes, 4] containing detection_boxes: float32 tensor of shape [batch, num_boxes, 4] containing
`num_boxes` detection boxes of the format [ymin, xmin, ymax, xmax] `num_boxes` detection boxes of the format [ymin, xmin, ymax, xmax]
in absolute image coordinates. in absolute image coordinates.
detection_scores: float32 tensor of shape [num_boxes] containing detection_scores: float32 tensor of shape [batch, num_boxes] containing
detection scores for the boxes. detection scores for the boxes.
detection_classes: int32 tensor of shape [num_boxes] containing detection_classes: int32 tensor of shape [batch, num_boxes] containing
1-indexed detection classes for the boxes. 1-indexed detection classes for the boxes.
num_gt_boxes_per_image: int32 tensor of shape [batch] containing the
number of groundtruth boxes per image. If None, will assume no padding
in groundtruth tensors.
num_det_boxes_per_image: int32 tensor of shape [batch] containing the
number of detection boxes per image. If None, will assume no padding in
the detection tensors.
Returns: Returns:
a dictionary of metric names to tuple of value_op and update_op that can a dictionary of metric names to tuple of value_op and update_op that can
...@@ -231,28 +244,68 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -231,28 +244,68 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
guarantee correct behaviour. guarantee correct behaviour.
""" """
def update_op( def update_op(
image_id, image_id_batched,
groundtruth_boxes, groundtruth_boxes_batched,
groundtruth_classes, groundtruth_classes_batched,
detection_boxes, num_gt_boxes_per_image,
detection_scores, detection_boxes_batched,
detection_classes): detection_scores_batched,
self.add_single_ground_truth_image_info( detection_classes_batched,
image_id, num_det_boxes_per_image):
{'groundtruth_boxes': groundtruth_boxes, """Update operation for adding batch of images to Coco evaluator."""
'groundtruth_classes': groundtruth_classes})
self.add_single_detected_image_info( for (image_id, gt_box, gt_class, num_gt_box, det_box, det_score,
image_id, det_class, num_det_box) in zip(
{'detection_boxes': detection_boxes, image_id_batched, groundtruth_boxes_batched,
'detection_scores': detection_scores, groundtruth_classes_batched, num_gt_boxes_per_image,
'detection_classes': detection_classes}) detection_boxes_batched, detection_scores_batched,
detection_classes_batched, num_det_boxes_per_image):
self.add_single_ground_truth_image_info(
image_id,
{'groundtruth_boxes': gt_box[:num_gt_box],
'groundtruth_classes': gt_class[:num_gt_box]})
self.add_single_detected_image_info(
image_id,
{'detection_boxes': det_box[:num_det_box],
'detection_scores': det_score[:num_det_box],
'detection_classes': det_class[:num_det_box]})
if not image_id.shape.as_list():
# Apply a batch dimension to all tensors.
image_id = tf.expand_dims(image_id, 0)
groundtruth_boxes = tf.expand_dims(groundtruth_boxes, 0)
groundtruth_classes = tf.expand_dims(groundtruth_classes, 0)
detection_boxes = tf.expand_dims(detection_boxes, 0)
detection_scores = tf.expand_dims(detection_scores, 0)
detection_classes = tf.expand_dims(detection_classes, 0)
if num_gt_boxes_per_image is None:
num_gt_boxes_per_image = tf.shape(groundtruth_boxes)[1:2]
else:
num_gt_boxes_per_image = tf.expand_dims(num_gt_boxes_per_image, 0)
if num_det_boxes_per_image is None:
num_det_boxes_per_image = tf.shape(detection_boxes)[1:2]
else:
num_det_boxes_per_image = tf.expand_dims(num_det_boxes_per_image, 0)
else:
if num_gt_boxes_per_image is None:
num_gt_boxes_per_image = tf.tile(
tf.shape(groundtruth_boxes)[1:2],
multiples=tf.shape(groundtruth_boxes)[0:1])
if num_det_boxes_per_image is None:
num_det_boxes_per_image = tf.tile(
tf.shape(detection_boxes)[1:2],
multiples=tf.shape(detection_boxes)[0:1])
update_op = tf.py_func(update_op, [image_id, update_op = tf.py_func(update_op, [image_id,
groundtruth_boxes, groundtruth_boxes,
groundtruth_classes, groundtruth_classes,
num_gt_boxes_per_image,
detection_boxes, detection_boxes,
detection_scores, detection_scores,
detection_classes], []) detection_classes,
num_det_boxes_per_image], [])
metric_names = ['DetectionBoxes_Precision/mAP', metric_names = ['DetectionBoxes_Precision/mAP',
'DetectionBoxes_Precision/mAP@.50IOU', 'DetectionBoxes_Precision/mAP@.50IOU',
'DetectionBoxes_Precision/mAP@.75IOU', 'DetectionBoxes_Precision/mAP@.75IOU',
...@@ -583,5 +636,3 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -583,5 +636,3 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
eval_metric_ops[metric_name] = (tf.py_func( eval_metric_ops[metric_name] = (tf.py_func(
value_func_factory(metric_name), [], np.float32), update_op) value_func_factory(metric_name), [], np.float32), update_op)
return eval_metric_ops return eval_metric_ops
...@@ -317,6 +317,230 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase): ...@@ -317,6 +317,230 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
self.assertFalse(coco_evaluator._detection_boxes_list) self.assertFalse(coco_evaluator._detection_boxes_list)
self.assertFalse(coco_evaluator._image_ids) self.assertFalse(coco_evaluator._image_ids)
def testGetOneMAPWithMatchingGroundtruthAndDetectionsPadded(self):
category_list = [{
'id': 0,
'name': 'person'
}, {
'id': 1,
'name': 'cat'
}, {
'id': 2,
'name': 'dog'
}]
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(category_list)
image_id = tf.placeholder(tf.string, shape=())
groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4))
groundtruth_classes = tf.placeholder(tf.float32, shape=(None))
detection_boxes = tf.placeholder(tf.float32, shape=(None, 4))
detection_scores = tf.placeholder(tf.float32, shape=(None))
detection_classes = tf.placeholder(tf.float32, shape=(None))
eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops(
image_id, groundtruth_boxes, groundtruth_classes, detection_boxes,
detection_scores, detection_classes)
_, update_op = eval_metric_ops['DetectionBoxes_Precision/mAP']
with self.test_session() as sess:
sess.run(
update_op,
feed_dict={
image_id:
'image1',
groundtruth_boxes:
np.array([[100., 100., 200., 200.], [-1, -1, -1, -1]]),
groundtruth_classes:
np.array([1, -1]),
detection_boxes:
np.array([[100., 100., 200., 200.], [0., 0., 0., 0.]]),
detection_scores:
np.array([.8, 0.]),
detection_classes:
np.array([1, -1])
})
sess.run(
update_op,
feed_dict={
image_id:
'image2',
groundtruth_boxes:
np.array([[50., 50., 100., 100.], [-1, -1, -1, -1]]),
groundtruth_classes:
np.array([3, -1]),
detection_boxes:
np.array([[50., 50., 100., 100.], [0., 0., 0., 0.]]),
detection_scores:
np.array([.7, 0.]),
detection_classes:
np.array([3, -1])
})
sess.run(
update_op,
feed_dict={
image_id:
'image3',
groundtruth_boxes:
np.array([[25., 25., 50., 50.], [10., 10., 15., 15.]]),
groundtruth_classes:
np.array([2, 2]),
detection_boxes:
np.array([[25., 25., 50., 50.], [10., 10., 15., 15.]]),
detection_scores:
np.array([.95, .9]),
detection_classes:
np.array([2, 2])
})
metrics = {}
for key, (value_op, _) in eval_metric_ops.iteritems():
metrics[key] = value_op
metrics = sess.run(metrics)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.50IOU'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'],
-1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 0.75)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'],
-1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0)
self.assertFalse(coco_evaluator._groundtruth_list)
self.assertFalse(coco_evaluator._detection_boxes_list)
self.assertFalse(coco_evaluator._image_ids)
def testGetOneMAPWithMatchingGroundtruthAndDetectionsBatched(self):
category_list = [{'id': 0, 'name': 'person'},
{'id': 1, 'name': 'cat'},
{'id': 2, 'name': 'dog'}]
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(category_list)
batch_size = 3
image_id = tf.placeholder(tf.string, shape=(batch_size))
groundtruth_boxes = tf.placeholder(tf.float32, shape=(batch_size, None, 4))
groundtruth_classes = tf.placeholder(tf.float32, shape=(batch_size, None))
detection_boxes = tf.placeholder(tf.float32, shape=(batch_size, None, 4))
detection_scores = tf.placeholder(tf.float32, shape=(batch_size, None))
detection_classes = tf.placeholder(tf.float32, shape=(batch_size, None))
eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops(
image_id, groundtruth_boxes,
groundtruth_classes,
detection_boxes,
detection_scores,
detection_classes)
_, update_op = eval_metric_ops['DetectionBoxes_Precision/mAP']
with self.test_session() as sess:
sess.run(update_op,
feed_dict={
image_id: ['image1', 'image2', 'image3'],
groundtruth_boxes: np.array([[[100., 100., 200., 200.]],
[[50., 50., 100., 100.]],
[[25., 25., 50., 50.]]]),
groundtruth_classes: np.array([[1], [3], [2]]),
detection_boxes: np.array([[[100., 100., 200., 200.]],
[[50., 50., 100., 100.]],
[[25., 25., 50., 50.]]]),
detection_scores: np.array([[.8], [.7], [.9]]),
detection_classes: np.array([[1], [3], [2]])
})
metrics = {}
for key, (value_op, _) in eval_metric_ops.iteritems():
metrics[key] = value_op
metrics = sess.run(metrics)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.50IOU'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'],
-1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'],
-1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0)
self.assertFalse(coco_evaluator._groundtruth_list)
self.assertFalse(coco_evaluator._detection_boxes_list)
self.assertFalse(coco_evaluator._image_ids)
def testGetOneMAPWithMatchingGroundtruthAndDetectionsPaddedBatches(self):
category_list = [{'id': 0, 'name': 'person'},
{'id': 1, 'name': 'cat'},
{'id': 2, 'name': 'dog'}]
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(category_list)
batch_size = 3
image_id = tf.placeholder(tf.string, shape=(batch_size))
groundtruth_boxes = tf.placeholder(tf.float32, shape=(batch_size, None, 4))
groundtruth_classes = tf.placeholder(tf.float32, shape=(batch_size, None))
num_gt_boxes_per_image = tf.placeholder(tf.int32, shape=(None))
detection_boxes = tf.placeholder(tf.float32, shape=(batch_size, None, 4))
detection_scores = tf.placeholder(tf.float32, shape=(batch_size, None))
detection_classes = tf.placeholder(tf.float32, shape=(batch_size, None))
num_det_boxes_per_image = tf.placeholder(tf.int32, shape=(None))
eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops(
image_id, groundtruth_boxes,
groundtruth_classes,
detection_boxes,
detection_scores,
detection_classes,
num_gt_boxes_per_image,
num_det_boxes_per_image)
_, update_op = eval_metric_ops['DetectionBoxes_Precision/mAP']
with self.test_session() as sess:
sess.run(update_op,
feed_dict={
image_id: ['image1', 'image2', 'image3'],
groundtruth_boxes: np.array([[[100., 100., 200., 200.],
[-1, -1, -1, -1]],
[[50., 50., 100., 100.],
[-1, -1, -1, -1]],
[[25., 25., 50., 50.],
[10., 10., 15., 15.]]]),
groundtruth_classes: np.array([[1, -1], [3, -1], [2, 2]]),
num_gt_boxes_per_image: np.array([1, 1, 2]),
detection_boxes: np.array([[[100., 100., 200., 200.],
[0., 0., 0., 0.]],
[[50., 50., 100., 100.],
[0., 0., 0., 0.]],
[[25., 25., 50., 50.],
[10., 10., 15., 15.]]]),
detection_scores: np.array([[.8, 0.], [.7, 0.], [.95, .9]]),
detection_classes: np.array([[1, -1], [3, -1], [2, 2]]),
num_det_boxes_per_image: np.array([1, 1, 2]),
})
metrics = {}
for key, (value_op, _) in eval_metric_ops.iteritems():
metrics[key] = value_op
metrics = sess.run(metrics)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.50IOU'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'],
-1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 0.75)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'],
-1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0)
self.assertFalse(coco_evaluator._groundtruth_list)
self.assertFalse(coco_evaluator._detection_boxes_list)
self.assertFalse(coco_evaluator._image_ids)
class CocoMaskEvaluationTest(tf.test.TestCase): class CocoMaskEvaluationTest(tf.test.TestCase):
......
# Copyright 2018 The TensorFlow Authors. All Rights Reserved. # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -36,6 +36,10 @@ flags.DEFINE_string( ...@@ -36,6 +36,10 @@ flags.DEFINE_string(
'hparams_overrides', None, 'Hyperparameter overrides, ' 'hparams_overrides', None, 'Hyperparameter overrides, '
'represented as a string containing comma-separated ' 'represented as a string containing comma-separated '
'hparam_name=value pairs.') 'hparam_name=value pairs.')
flags.DEFINE_string(
'checkpoint_dir', None, 'Path to directory holding a checkpoint. If '
'`checkpoint_dir` is provided, this binary operates in eval-only mode, '
'writing resulting metrics to `model_dir`.')
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
...@@ -59,17 +63,23 @@ def main(unused_argv): ...@@ -59,17 +63,23 @@ def main(unused_argv):
train_steps = train_and_eval_dict['train_steps'] train_steps = train_and_eval_dict['train_steps']
eval_steps = train_and_eval_dict['eval_steps'] eval_steps = train_and_eval_dict['eval_steps']
train_spec, eval_specs = model_lib.create_train_and_eval_specs( if FLAGS.checkpoint_dir:
train_input_fn, estimator.evaluate(eval_input_fn,
eval_input_fn, eval_steps,
eval_on_train_input_fn, checkpoint_path=tf.train.latest_checkpoint(
predict_input_fn, FLAGS.checkpoint_dir))
train_steps, else:
eval_steps, train_spec, eval_specs = model_lib.create_train_and_eval_specs(
eval_on_train_data=False) train_input_fn,
eval_input_fn,
eval_on_train_input_fn,
predict_input_fn,
train_steps,
eval_steps,
eval_on_train_data=False)
# Currently only a single Eval Spec is allowed. # Currently only a single Eval Spec is allowed.
tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0]) tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -56,7 +56,9 @@ flags.DEFINE_integer('iterations_per_loop', 100, ...@@ -56,7 +56,9 @@ flags.DEFINE_integer('iterations_per_loop', 100,
# recent checkpoint every 10 minutes by default for train_and_eval # recent checkpoint every 10 minutes by default for train_and_eval
flags.DEFINE_string('mode', 'train', flags.DEFINE_string('mode', 'train',
'Mode to run: train, eval') 'Mode to run: train, eval')
flags.DEFINE_integer('train_batch_size', 32 * 8, 'Batch size for training.') flags.DEFINE_integer('train_batch_size', None, 'Batch size for training. If '
'this is not provided, batch size is read from training '
'config.')
flags.DEFINE_string( flags.DEFINE_string(
'hparams_overrides', None, 'Comma-separated list of ' 'hparams_overrides', None, 'Comma-separated list of '
...@@ -93,6 +95,10 @@ def main(unused_argv): ...@@ -93,6 +95,10 @@ def main(unused_argv):
iterations_per_loop=FLAGS.iterations_per_loop, iterations_per_loop=FLAGS.iterations_per_loop,
num_shards=FLAGS.num_shards)) num_shards=FLAGS.num_shards))
kwargs = {}
if FLAGS.train_batch_size:
kwargs['batch_size'] = FLAGS.train_batch_size
train_and_eval_dict = model_lib.create_estimator_and_inputs( train_and_eval_dict = model_lib.create_estimator_and_inputs(
run_config=config, run_config=config,
hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
...@@ -102,7 +108,7 @@ def main(unused_argv): ...@@ -102,7 +108,7 @@ def main(unused_argv):
use_tpu_estimator=True, use_tpu_estimator=True,
use_tpu=FLAGS.use_tpu, use_tpu=FLAGS.use_tpu,
num_shards=FLAGS.num_shards, num_shards=FLAGS.num_shards,
batch_size=FLAGS.train_batch_size) **kwargs)
estimator = train_and_eval_dict['estimator'] estimator = train_and_eval_dict['estimator']
train_input_fn = train_and_eval_dict['train_input_fn'] train_input_fn = train_and_eval_dict['train_input_fn']
eval_input_fn = train_and_eval_dict['eval_input_fn'] eval_input_fn = train_and_eval_dict['eval_input_fn']
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment