Unverified Commit 31ae57eb authored by pkulzc's avatar pkulzc Committed by GitHub
Browse files

Minor fixes for object detection (#5613)

* Internal change.

PiperOrigin-RevId: 213914693

* Add original_image_spatial_shape tensor in input dictionary to store shape of the original input image

PiperOrigin-RevId: 214018767

* Remove "groundtruth_confidences" from decoders use "groundtruth_weights" to indicate label confidence.

This also solves a bug that only surfaced now - random crop routines in core/preprocessor.py did not correctly handle "groundtruth_weight" tensors returned by the decoders.

PiperOrigin-RevId: 214091843

* Update CocoMaskEvaluator to allow for a batch of image info, rather than a single image.

PiperOrigin-RevId: 214295305

* Adding the option to be able to summarize gradients.

PiperOrigin-RevId: 214310875

* Adds FasterRCNN inference on CPU

1. Adds a flag use_static_shapes_for_eval to restrict to the ops that guarantees static shape.
2. No filtering of overlapping anchors while clipping the anchors when use_static_shapes_for_eval is set to True.
3. A...
parent 0b0c9cfd
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
"""Tests for ssd resnet v1 feature extractors.""" """Tests for ssd resnet v1 feature extractors."""
import abc import abc
import numpy as np import numpy as np
import tensorflow as tf
from object_detection.models import ssd_feature_extractor_test from object_detection.models import ssd_feature_extractor_test
...@@ -64,12 +65,15 @@ class SSDResnetPpnFeatureExtractorTestBase( ...@@ -64,12 +65,15 @@ class SSDResnetPpnFeatureExtractorTestBase(
image_width = 128 image_width = 128
depth_multiplier = 1 depth_multiplier = 1
pad_to_multiple = 1 pad_to_multiple = 1
test_image = np.random.rand(4, image_height, image_width, 3) test_image = tf.constant(np.random.rand(4, image_height, image_width, 3))
feature_extractor = self._create_feature_extractor(depth_multiplier, feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple) pad_to_multiple)
preprocessed_image = feature_extractor.preprocess(test_image) preprocessed_image = feature_extractor.preprocess(test_image)
self.assertAllClose(preprocessed_image, with self.test_session() as sess:
test_image - [[123.68, 116.779, 103.939]]) test_image_out, preprocessed_image_out = sess.run(
[test_image, preprocessed_image])
self.assertAllClose(preprocessed_image_out,
test_image_out - [[123.68, 116.779, 103.939]])
def test_variables_only_created_in_scope(self): def test_variables_only_created_in_scope(self):
depth_multiplier = 1 depth_multiplier = 1
......
...@@ -134,26 +134,32 @@ class ConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor): ...@@ -134,26 +134,32 @@ class ConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
(len(self._prediction_heads[BOX_ENCODINGS]), (len(self._prediction_heads[BOX_ENCODINGS]),
len(input_shapes))) len(input_shapes)))
for stack_index, input_shape in enumerate(input_shapes): for stack_index, input_shape in enumerate(input_shapes):
net = tf.keras.Sequential(name='PreHeadConvolutions_%d' % stack_index) net = []
self._shared_nets.append(net)
# Add additional conv layers before the class predictor. # Add additional conv layers before the class predictor.
features_depth = static_shape.get_depth(input_shape) features_depth = static_shape.get_depth(input_shape)
depth = max(min(features_depth, self._max_depth), self._min_depth) depth = max(min(features_depth, self._max_depth), self._min_depth)
tf.logging.info( tf.logging.info(
'depth of additional conv before box predictor: {}'.format(depth)) 'depth of additional conv before box predictor: {}'.format(depth))
if depth > 0 and self._num_layers_before_predictor > 0: if depth > 0 and self._num_layers_before_predictor > 0:
for i in range(self._num_layers_before_predictor): for i in range(self._num_layers_before_predictor):
net.add(keras.Conv2D(depth, [1, 1], net.append(keras.Conv2D(depth, [1, 1],
name='Conv2d_%d_1x1_%d' % (i, depth), name='SharedConvolutions_%d/Conv2d_%d_1x1_%d'
padding='SAME', % (stack_index, i, depth),
**self._conv_hyperparams.params())) padding='SAME',
net.add(self._conv_hyperparams.build_batch_norm( **self._conv_hyperparams.params()))
net.append(self._conv_hyperparams.build_batch_norm(
training=(self._is_training and not self._freeze_batchnorm), training=(self._is_training and not self._freeze_batchnorm),
name='Conv2d_%d_1x1_%d_norm' % (i, depth))) name='SharedConvolutions_%d/Conv2d_%d_1x1_%d_norm'
net.add(self._conv_hyperparams.build_activation_layer( % (stack_index, i, depth)))
name='Conv2d_%d_1x1_%d_activation' % (i, depth), net.append(self._conv_hyperparams.build_activation_layer(
name='SharedConvolutions_%d/Conv2d_%d_1x1_%d_activation'
% (stack_index, i, depth),
)) ))
# Until certain bugs are fixed in checkpointable lists,
# this net must be appended only once it's been filled with layers
self._shared_nets.append(net)
self.built = True self.built = True
def _predict(self, image_features): def _predict(self, image_features):
...@@ -175,10 +181,11 @@ class ConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor): ...@@ -175,10 +181,11 @@ class ConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
""" """
predictions = collections.defaultdict(list) predictions = collections.defaultdict(list)
for (index, image_feature) in enumerate(image_features): for (index, net) in enumerate(image_features):
# Apply shared conv layers before the head predictors. # Apply shared conv layers before the head predictors.
net = self._shared_nets[index](image_feature) for layer in self._shared_nets[index]:
net = layer(net)
for head_name in self._prediction_heads: for head_name in self._prediction_heads:
head_obj = self._prediction_heads[head_name][index] head_obj = self._prediction_heads[head_name][index]
......
...@@ -181,8 +181,8 @@ class ConvolutionalKerasBoxPredictorTest(test_case.TestCase): ...@@ -181,8 +181,8 @@ class ConvolutionalKerasBoxPredictorTest(test_case.TestCase):
self.assertAllEqual(objectness_predictions_shape, self.assertAllEqual(objectness_predictions_shape,
[4, expected_num_anchors, 1]) [4, expected_num_anchors, 1])
expected_variable_set = set([ expected_variable_set = set([
'BoxPredictor/PreHeadConvolutions_0/Conv2d_0_1x1_32/bias', 'BoxPredictor/SharedConvolutions_0/Conv2d_0_1x1_32/bias',
'BoxPredictor/PreHeadConvolutions_0/Conv2d_0_1x1_32/kernel', 'BoxPredictor/SharedConvolutions_0/Conv2d_0_1x1_32/kernel',
'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/bias', 'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/bias',
'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/kernel', 'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/kernel',
'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor/bias', 'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor/bias',
......
...@@ -34,16 +34,18 @@ class MaskRCNNClassHead(head.Head): ...@@ -34,16 +34,18 @@ class MaskRCNNClassHead(head.Head):
https://arxiv.org/abs/1703.06870 https://arxiv.org/abs/1703.06870
""" """
def __init__(self, is_training, num_classes, fc_hyperparams_fn, def __init__(self,
use_dropout, dropout_keep_prob): is_training,
num_class_slots,
fc_hyperparams_fn,
use_dropout,
dropout_keep_prob):
"""Constructor. """Constructor.
Args: Args:
is_training: Indicates whether the BoxPredictor is in training mode. is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: number of classes. Note that num_classes *does not* num_class_slots: number of class slots. Note that num_class_slots may or
include the background category, so if groundtruth labels take values may not include an implicit background category.
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
fc_hyperparams_fn: A function to generate tf-slim arg_scope with fc_hyperparams_fn: A function to generate tf-slim arg_scope with
hyperparameters for fully connected ops. hyperparameters for fully connected ops.
use_dropout: Option to use dropout or not. Note that a single dropout use_dropout: Option to use dropout or not. Note that a single dropout
...@@ -54,7 +56,7 @@ class MaskRCNNClassHead(head.Head): ...@@ -54,7 +56,7 @@ class MaskRCNNClassHead(head.Head):
""" """
super(MaskRCNNClassHead, self).__init__() super(MaskRCNNClassHead, self).__init__()
self._is_training = is_training self._is_training = is_training
self._num_classes = num_classes self._num_class_slots = num_class_slots
self._fc_hyperparams_fn = fc_hyperparams_fn self._fc_hyperparams_fn = fc_hyperparams_fn
self._use_dropout = use_dropout self._use_dropout = use_dropout
self._dropout_keep_prob = dropout_keep_prob self._dropout_keep_prob = dropout_keep_prob
...@@ -70,7 +72,7 @@ class MaskRCNNClassHead(head.Head): ...@@ -70,7 +72,7 @@ class MaskRCNNClassHead(head.Head):
Returns: Returns:
class_predictions_with_background: A float tensor of shape class_predictions_with_background: A float tensor of shape
[batch_size, 1, num_classes + 1] representing the class predictions for [batch_size, 1, num_class_slots] representing the class predictions for
the proposals. the proposals.
Raises: Raises:
...@@ -91,11 +93,12 @@ class MaskRCNNClassHead(head.Head): ...@@ -91,11 +93,12 @@ class MaskRCNNClassHead(head.Head):
with slim.arg_scope(self._fc_hyperparams_fn()): with slim.arg_scope(self._fc_hyperparams_fn()):
class_predictions_with_background = slim.fully_connected( class_predictions_with_background = slim.fully_connected(
flattened_roi_pooled_features, flattened_roi_pooled_features,
self._num_classes + 1, self._num_class_slots,
activation_fn=None, activation_fn=None,
scope='ClassPredictor') scope='ClassPredictor')
class_predictions_with_background = tf.reshape( class_predictions_with_background = tf.reshape(
class_predictions_with_background, [-1, 1, self._num_classes + 1]) class_predictions_with_background,
[-1, 1, self._num_class_slots])
return class_predictions_with_background return class_predictions_with_background
...@@ -104,7 +107,7 @@ class ConvolutionalClassHead(head.Head): ...@@ -104,7 +107,7 @@ class ConvolutionalClassHead(head.Head):
def __init__(self, def __init__(self,
is_training, is_training,
num_classes, num_class_slots,
use_dropout, use_dropout,
dropout_keep_prob, dropout_keep_prob,
kernel_size, kernel_size,
...@@ -115,7 +118,8 @@ class ConvolutionalClassHead(head.Head): ...@@ -115,7 +118,8 @@ class ConvolutionalClassHead(head.Head):
Args: Args:
is_training: Indicates whether the BoxPredictor is in training mode. is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: Number of classes. num_class_slots: number of class slots. Note that num_class_slots may or
may not include an implicit background category.
use_dropout: Option to use dropout or not. Note that a single dropout use_dropout: Option to use dropout or not. Note that a single dropout
op is applied here prior to both box and class predictions, which stands op is applied here prior to both box and class predictions, which stands
in contrast to the ConvolutionalBoxPredictor below. in contrast to the ConvolutionalBoxPredictor below.
...@@ -137,7 +141,7 @@ class ConvolutionalClassHead(head.Head): ...@@ -137,7 +141,7 @@ class ConvolutionalClassHead(head.Head):
""" """
super(ConvolutionalClassHead, self).__init__() super(ConvolutionalClassHead, self).__init__()
self._is_training = is_training self._is_training = is_training
self._num_classes = num_classes self._num_class_slots = num_class_slots
self._use_dropout = use_dropout self._use_dropout = use_dropout
self._dropout_keep_prob = dropout_keep_prob self._dropout_keep_prob = dropout_keep_prob
self._kernel_size = kernel_size self._kernel_size = kernel_size
...@@ -156,12 +160,10 @@ class ConvolutionalClassHead(head.Head): ...@@ -156,12 +160,10 @@ class ConvolutionalClassHead(head.Head):
Returns: Returns:
class_predictions_with_background: A float tensors of shape class_predictions_with_background: A float tensors of shape
[batch_size, num_anchors, num_classes + 1] representing the class [batch_size, num_anchors, num_class_slots] representing the class
predictions for the proposals. predictions for the proposals.
""" """
net = features net = features
# Add a slot for the background class.
num_class_slots = self._num_classes + 1
if self._use_dropout: if self._use_dropout:
net = slim.dropout(net, keep_prob=self._dropout_keep_prob) net = slim.dropout(net, keep_prob=self._dropout_keep_prob)
if self._use_depthwise: if self._use_depthwise:
...@@ -171,7 +173,7 @@ class ConvolutionalClassHead(head.Head): ...@@ -171,7 +173,7 @@ class ConvolutionalClassHead(head.Head):
rate=1, scope='ClassPredictor_depthwise') rate=1, scope='ClassPredictor_depthwise')
class_predictions_with_background = slim.conv2d( class_predictions_with_background = slim.conv2d(
class_predictions_with_background, class_predictions_with_background,
num_predictions_per_location * num_class_slots, [1, 1], num_predictions_per_location * self._num_class_slots, [1, 1],
activation_fn=None, activation_fn=None,
normalizer_fn=None, normalizer_fn=None,
normalizer_params=None, normalizer_params=None,
...@@ -179,7 +181,7 @@ class ConvolutionalClassHead(head.Head): ...@@ -179,7 +181,7 @@ class ConvolutionalClassHead(head.Head):
else: else:
class_predictions_with_background = slim.conv2d( class_predictions_with_background = slim.conv2d(
net, net,
num_predictions_per_location * num_class_slots, num_predictions_per_location * self._num_class_slots,
[self._kernel_size, self._kernel_size], [self._kernel_size, self._kernel_size],
activation_fn=None, activation_fn=None,
normalizer_fn=None, normalizer_fn=None,
...@@ -194,7 +196,8 @@ class ConvolutionalClassHead(head.Head): ...@@ -194,7 +196,8 @@ class ConvolutionalClassHead(head.Head):
if batch_size is None: if batch_size is None:
batch_size = tf.shape(features)[0] batch_size = tf.shape(features)[0]
class_predictions_with_background = tf.reshape( class_predictions_with_background = tf.reshape(
class_predictions_with_background, [batch_size, -1, num_class_slots]) class_predictions_with_background,
[batch_size, -1, self._num_class_slots])
return class_predictions_with_background return class_predictions_with_background
...@@ -208,7 +211,7 @@ class WeightSharedConvolutionalClassHead(head.Head): ...@@ -208,7 +211,7 @@ class WeightSharedConvolutionalClassHead(head.Head):
""" """
def __init__(self, def __init__(self,
num_classes, num_class_slots,
kernel_size=3, kernel_size=3,
class_prediction_bias_init=0.0, class_prediction_bias_init=0.0,
use_dropout=False, use_dropout=False,
...@@ -218,10 +221,8 @@ class WeightSharedConvolutionalClassHead(head.Head): ...@@ -218,10 +221,8 @@ class WeightSharedConvolutionalClassHead(head.Head):
"""Constructor. """Constructor.
Args: Args:
num_classes: number of classes. Note that num_classes *does not* num_class_slots: number of class slots. Note that num_class_slots may or
include the background category, so if groundtruth labels take values may not include an implicit background category.
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
kernel_size: Size of final convolution kernel. kernel_size: Size of final convolution kernel.
class_prediction_bias_init: constant value to initialize bias of the last class_prediction_bias_init: constant value to initialize bias of the last
conv2d layer before class prediction. conv2d layer before class prediction.
...@@ -233,7 +234,7 @@ class WeightSharedConvolutionalClassHead(head.Head): ...@@ -233,7 +234,7 @@ class WeightSharedConvolutionalClassHead(head.Head):
as inputs and returns tensors). as inputs and returns tensors).
""" """
super(WeightSharedConvolutionalClassHead, self).__init__() super(WeightSharedConvolutionalClassHead, self).__init__()
self._num_classes = num_classes self._num_class_slots = num_class_slots
self._kernel_size = kernel_size self._kernel_size = kernel_size
self._class_prediction_bias_init = class_prediction_bias_init self._class_prediction_bias_init = class_prediction_bias_init
self._use_dropout = use_dropout self._use_dropout = use_dropout
...@@ -252,12 +253,10 @@ class WeightSharedConvolutionalClassHead(head.Head): ...@@ -252,12 +253,10 @@ class WeightSharedConvolutionalClassHead(head.Head):
Returns: Returns:
class_predictions_with_background: A tensor of shape class_predictions_with_background: A tensor of shape
[batch_size, num_anchors, num_classes + 1] representing the class [batch_size, num_anchors, num_class_slots] representing the class
predictions for the proposals. predictions for the proposals.
""" """
class_predictions_net = features class_predictions_net = features
num_class_slots = self._num_classes + 1
# Add a slot for the background class.
if self._use_dropout: if self._use_dropout:
class_predictions_net = slim.dropout( class_predictions_net = slim.dropout(
class_predictions_net, keep_prob=self._dropout_keep_prob) class_predictions_net, keep_prob=self._dropout_keep_prob)
...@@ -267,7 +266,7 @@ class WeightSharedConvolutionalClassHead(head.Head): ...@@ -267,7 +266,7 @@ class WeightSharedConvolutionalClassHead(head.Head):
conv_op = slim.conv2d conv_op = slim.conv2d
class_predictions_with_background = conv_op( class_predictions_with_background = conv_op(
class_predictions_net, class_predictions_net,
num_predictions_per_location * num_class_slots, num_predictions_per_location * self._num_class_slots,
[self._kernel_size, self._kernel_size], [self._kernel_size, self._kernel_size],
activation_fn=None, stride=1, padding='SAME', activation_fn=None, stride=1, padding='SAME',
normalizer_fn=None, normalizer_fn=None,
...@@ -280,5 +279,6 @@ class WeightSharedConvolutionalClassHead(head.Head): ...@@ -280,5 +279,6 @@ class WeightSharedConvolutionalClassHead(head.Head):
class_predictions_with_background = self._score_converter_fn( class_predictions_with_background = self._score_converter_fn(
class_predictions_with_background) class_predictions_with_background)
class_predictions_with_background = tf.reshape( class_predictions_with_background = tf.reshape(
class_predictions_with_background, [batch_size, -1, num_class_slots]) class_predictions_with_background,
[batch_size, -1, self._num_class_slots])
return class_predictions_with_background return class_predictions_with_background
...@@ -46,7 +46,7 @@ class MaskRCNNClassHeadTest(test_case.TestCase): ...@@ -46,7 +46,7 @@ class MaskRCNNClassHeadTest(test_case.TestCase):
def test_prediction_size(self): def test_prediction_size(self):
class_prediction_head = class_head.MaskRCNNClassHead( class_prediction_head = class_head.MaskRCNNClassHead(
is_training=False, is_training=False,
num_classes=20, num_class_slots=20,
fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(), fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
use_dropout=True, use_dropout=True,
dropout_keep_prob=0.5) dropout_keep_prob=0.5)
...@@ -54,7 +54,7 @@ class MaskRCNNClassHeadTest(test_case.TestCase): ...@@ -54,7 +54,7 @@ class MaskRCNNClassHeadTest(test_case.TestCase):
[64, 7, 7, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) [64, 7, 7, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
prediction = class_prediction_head.predict( prediction = class_prediction_head.predict(
features=roi_pooled_features, num_predictions_per_location=1) features=roi_pooled_features, num_predictions_per_location=1)
self.assertAllEqual([64, 1, 21], prediction.get_shape().as_list()) self.assertAllEqual([64, 1, 20], prediction.get_shape().as_list())
class ConvolutionalClassPredictorTest(test_case.TestCase): class ConvolutionalClassPredictorTest(test_case.TestCase):
...@@ -80,7 +80,7 @@ class ConvolutionalClassPredictorTest(test_case.TestCase): ...@@ -80,7 +80,7 @@ class ConvolutionalClassPredictorTest(test_case.TestCase):
def test_prediction_size(self): def test_prediction_size(self):
class_prediction_head = class_head.ConvolutionalClassHead( class_prediction_head = class_head.ConvolutionalClassHead(
is_training=True, is_training=True,
num_classes=20, num_class_slots=20,
use_dropout=True, use_dropout=True,
dropout_keep_prob=0.5, dropout_keep_prob=0.5,
kernel_size=3) kernel_size=3)
...@@ -89,7 +89,7 @@ class ConvolutionalClassPredictorTest(test_case.TestCase): ...@@ -89,7 +89,7 @@ class ConvolutionalClassPredictorTest(test_case.TestCase):
class_predictions = class_prediction_head.predict( class_predictions = class_prediction_head.predict(
features=image_feature, features=image_feature,
num_predictions_per_location=1) num_predictions_per_location=1)
self.assertAllEqual([64, 323, 21], self.assertAllEqual([64, 323, 20],
class_predictions.get_shape().as_list()) class_predictions.get_shape().as_list())
...@@ -115,13 +115,13 @@ class WeightSharedConvolutionalClassPredictorTest(test_case.TestCase): ...@@ -115,13 +115,13 @@ class WeightSharedConvolutionalClassPredictorTest(test_case.TestCase):
def test_prediction_size(self): def test_prediction_size(self):
class_prediction_head = ( class_prediction_head = (
class_head.WeightSharedConvolutionalClassHead(num_classes=20)) class_head.WeightSharedConvolutionalClassHead(num_class_slots=20))
image_feature = tf.random_uniform( image_feature = tf.random_uniform(
[64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
class_predictions = class_prediction_head.predict( class_predictions = class_prediction_head.predict(
features=image_feature, features=image_feature,
num_predictions_per_location=1) num_predictions_per_location=1)
self.assertAllEqual([64, 323, 21], class_predictions.get_shape().as_list()) self.assertAllEqual([64, 323, 20], class_predictions.get_shape().as_list())
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -91,7 +91,7 @@ class ConvolutionalBoxHead(head.KerasHead): ...@@ -91,7 +91,7 @@ class ConvolutionalBoxHead(head.KerasHead):
tf.keras.layers.Conv2D( tf.keras.layers.Conv2D(
num_predictions_per_location * self._box_code_size, [1, 1], num_predictions_per_location * self._box_code_size, [1, 1],
name='BoxEncodingPredictor', name='BoxEncodingPredictor',
**conv_hyperparams.params(activation=None))) **conv_hyperparams.params(use_bias=True)))
else: else:
self._box_encoder_layers.append( self._box_encoder_layers.append(
tf.keras.layers.Conv2D( tf.keras.layers.Conv2D(
...@@ -99,7 +99,7 @@ class ConvolutionalBoxHead(head.KerasHead): ...@@ -99,7 +99,7 @@ class ConvolutionalBoxHead(head.KerasHead):
[self._kernel_size, self._kernel_size], [self._kernel_size, self._kernel_size],
padding='SAME', padding='SAME',
name='BoxEncodingPredictor', name='BoxEncodingPredictor',
**conv_hyperparams.params(activation=None))) **conv_hyperparams.params(use_bias=True)))
def _predict(self, features): def _predict(self, features):
"""Predicts boxes. """Predicts boxes.
......
...@@ -29,7 +29,7 @@ class ConvolutionalClassHead(head.KerasHead): ...@@ -29,7 +29,7 @@ class ConvolutionalClassHead(head.KerasHead):
def __init__(self, def __init__(self,
is_training, is_training,
num_classes, num_class_slots,
use_dropout, use_dropout,
dropout_keep_prob, dropout_keep_prob,
kernel_size, kernel_size,
...@@ -43,7 +43,8 @@ class ConvolutionalClassHead(head.KerasHead): ...@@ -43,7 +43,8 @@ class ConvolutionalClassHead(head.KerasHead):
Args: Args:
is_training: Indicates whether the BoxPredictor is in training mode. is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: Number of classes. num_class_slots: number of class slots. Note that num_class_slots may or
may not include an implicit background category.
use_dropout: Option to use dropout or not. Note that a single dropout use_dropout: Option to use dropout or not. Note that a single dropout
op is applied here prior to both box and class predictions, which stands op is applied here prior to both box and class predictions, which stands
in contrast to the ConvolutionalBoxPredictor below. in contrast to the ConvolutionalBoxPredictor below.
...@@ -73,13 +74,12 @@ class ConvolutionalClassHead(head.KerasHead): ...@@ -73,13 +74,12 @@ class ConvolutionalClassHead(head.KerasHead):
""" """
super(ConvolutionalClassHead, self).__init__(name=name) super(ConvolutionalClassHead, self).__init__(name=name)
self._is_training = is_training self._is_training = is_training
self._num_classes = num_classes
self._use_dropout = use_dropout self._use_dropout = use_dropout
self._dropout_keep_prob = dropout_keep_prob self._dropout_keep_prob = dropout_keep_prob
self._kernel_size = kernel_size self._kernel_size = kernel_size
self._class_prediction_bias_init = class_prediction_bias_init self._class_prediction_bias_init = class_prediction_bias_init
self._use_depthwise = use_depthwise self._use_depthwise = use_depthwise
self._num_class_slots = self._num_classes + 1 self._num_class_slots = num_class_slots
self._class_predictor_layers = [] self._class_predictor_layers = []
...@@ -110,7 +110,7 @@ class ConvolutionalClassHead(head.KerasHead): ...@@ -110,7 +110,7 @@ class ConvolutionalClassHead(head.KerasHead):
tf.keras.layers.Conv2D( tf.keras.layers.Conv2D(
num_predictions_per_location * self._num_class_slots, [1, 1], num_predictions_per_location * self._num_class_slots, [1, 1],
name='ClassPredictor', name='ClassPredictor',
**conv_hyperparams.params(activation=None))) **conv_hyperparams.params(use_bias=True)))
else: else:
self._class_predictor_layers.append( self._class_predictor_layers.append(
tf.keras.layers.Conv2D( tf.keras.layers.Conv2D(
...@@ -120,7 +120,7 @@ class ConvolutionalClassHead(head.KerasHead): ...@@ -120,7 +120,7 @@ class ConvolutionalClassHead(head.KerasHead):
name='ClassPredictor', name='ClassPredictor',
bias_initializer=tf.constant_initializer( bias_initializer=tf.constant_initializer(
self._class_prediction_bias_init), self._class_prediction_bias_init),
**conv_hyperparams.params(activation=None))) **conv_hyperparams.params(use_bias=True)))
def _predict(self, features): def _predict(self, features):
"""Predicts boxes. """Predicts boxes.
...@@ -131,7 +131,7 @@ class ConvolutionalClassHead(head.KerasHead): ...@@ -131,7 +131,7 @@ class ConvolutionalClassHead(head.KerasHead):
Returns: Returns:
class_predictions_with_background: A float tensor of shape class_predictions_with_background: A float tensor of shape
[batch_size, num_anchors, num_classes + 1] representing the class [batch_size, num_anchors, num_class_slots] representing the class
predictions for the proposals. predictions for the proposals.
""" """
# Add a slot for the background class. # Add a slot for the background class.
......
...@@ -45,7 +45,7 @@ class ConvolutionalKerasClassPredictorTest(test_case.TestCase): ...@@ -45,7 +45,7 @@ class ConvolutionalKerasClassPredictorTest(test_case.TestCase):
conv_hyperparams = self._build_conv_hyperparams() conv_hyperparams = self._build_conv_hyperparams()
class_prediction_head = keras_class_head.ConvolutionalClassHead( class_prediction_head = keras_class_head.ConvolutionalClassHead(
is_training=True, is_training=True,
num_classes=20, num_class_slots=20,
use_dropout=True, use_dropout=True,
dropout_keep_prob=0.5, dropout_keep_prob=0.5,
kernel_size=3, kernel_size=3,
...@@ -56,7 +56,7 @@ class ConvolutionalKerasClassPredictorTest(test_case.TestCase): ...@@ -56,7 +56,7 @@ class ConvolutionalKerasClassPredictorTest(test_case.TestCase):
image_feature = tf.random_uniform( image_feature = tf.random_uniform(
[64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
class_predictions = class_prediction_head(image_feature,) class_predictions = class_prediction_head(image_feature,)
self.assertAllEqual([64, 323, 21], self.assertAllEqual([64, 323, 20],
class_predictions.get_shape().as_list()) class_predictions.get_shape().as_list())
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10 # TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
......
...@@ -124,7 +124,7 @@ class ConvolutionalMaskHead(head.KerasHead): ...@@ -124,7 +124,7 @@ class ConvolutionalMaskHead(head.KerasHead):
tf.keras.layers.Conv2D( tf.keras.layers.Conv2D(
num_predictions_per_location * num_mask_channels, [1, 1], num_predictions_per_location * num_mask_channels, [1, 1],
name='MaskPredictor', name='MaskPredictor',
**conv_hyperparams.params(activation=None))) **conv_hyperparams.params(use_bias=True)))
else: else:
self._mask_predictor_layers.append( self._mask_predictor_layers.append(
tf.keras.layers.Conv2D( tf.keras.layers.Conv2D(
...@@ -132,7 +132,7 @@ class ConvolutionalMaskHead(head.KerasHead): ...@@ -132,7 +132,7 @@ class ConvolutionalMaskHead(head.KerasHead):
[self._kernel_size, self._kernel_size], [self._kernel_size, self._kernel_size],
padding='SAME', padding='SAME',
name='MaskPredictor', name='MaskPredictor',
**conv_hyperparams.params(activation=None))) **conv_hyperparams.params(use_bias=True)))
def _predict(self, features): def _predict(self, features):
"""Predicts boxes. """Predicts boxes.
......
...@@ -23,6 +23,7 @@ import math ...@@ -23,6 +23,7 @@ import math
import tensorflow as tf import tensorflow as tf
from object_detection.predictors.heads import head from object_detection.predictors.heads import head
from object_detection.utils import ops
slim = tf.contrib.slim slim = tf.contrib.slim
...@@ -41,7 +42,8 @@ class MaskRCNNMaskHead(head.Head): ...@@ -41,7 +42,8 @@ class MaskRCNNMaskHead(head.Head):
mask_width=14, mask_width=14,
mask_prediction_num_conv_layers=2, mask_prediction_num_conv_layers=2,
mask_prediction_conv_depth=256, mask_prediction_conv_depth=256,
masks_are_class_agnostic=False): masks_are_class_agnostic=False,
convolve_then_upsample=False):
"""Constructor. """Constructor.
Args: Args:
...@@ -62,6 +64,10 @@ class MaskRCNNMaskHead(head.Head): ...@@ -62,6 +64,10 @@ class MaskRCNNMaskHead(head.Head):
image features. image features.
masks_are_class_agnostic: Boolean determining if the mask-head is masks_are_class_agnostic: Boolean determining if the mask-head is
class-agnostic or not. class-agnostic or not.
convolve_then_upsample: Whether to apply convolutions on mask features
before upsampling using nearest neighbor resizing. Otherwise, mask
features are resized to [`mask_height`, `mask_width`] using bilinear
resizing before applying convolutions.
Raises: Raises:
ValueError: conv_hyperparams_fn is None. ValueError: conv_hyperparams_fn is None.
...@@ -74,6 +80,7 @@ class MaskRCNNMaskHead(head.Head): ...@@ -74,6 +80,7 @@ class MaskRCNNMaskHead(head.Head):
self._mask_prediction_num_conv_layers = mask_prediction_num_conv_layers self._mask_prediction_num_conv_layers = mask_prediction_num_conv_layers
self._mask_prediction_conv_depth = mask_prediction_conv_depth self._mask_prediction_conv_depth = mask_prediction_conv_depth
self._masks_are_class_agnostic = masks_are_class_agnostic self._masks_are_class_agnostic = masks_are_class_agnostic
self._convolve_then_upsample = convolve_then_upsample
if conv_hyperparams_fn is None: if conv_hyperparams_fn is None:
raise ValueError('conv_hyperparams_fn is None.') raise ValueError('conv_hyperparams_fn is None.')
...@@ -135,17 +142,30 @@ class MaskRCNNMaskHead(head.Head): ...@@ -135,17 +142,30 @@ class MaskRCNNMaskHead(head.Head):
num_conv_channels = self._get_mask_predictor_conv_depth( num_conv_channels = self._get_mask_predictor_conv_depth(
num_feature_channels, self._num_classes) num_feature_channels, self._num_classes)
with slim.arg_scope(self._conv_hyperparams_fn()): with slim.arg_scope(self._conv_hyperparams_fn()):
upsampled_features = tf.image.resize_bilinear( if not self._convolve_then_upsample:
features, [self._mask_height, self._mask_width], features = tf.image.resize_bilinear(
align_corners=True) features, [self._mask_height, self._mask_width],
align_corners=True)
for _ in range(self._mask_prediction_num_conv_layers - 1): for _ in range(self._mask_prediction_num_conv_layers - 1):
upsampled_features = slim.conv2d( features = slim.conv2d(
upsampled_features, features,
num_outputs=num_conv_channels,
kernel_size=[3, 3])
if self._convolve_then_upsample:
# Replace Transposed Convolution with a Nearest Neighbor upsampling step
# followed by 3x3 convolution.
height_scale = self._mask_height / features.shape[1].value
width_scale = self._mask_width / features.shape[2].value
features = ops.nearest_neighbor_upsampling(
features, height_scale=height_scale, width_scale=width_scale)
features = slim.conv2d(
features,
num_outputs=num_conv_channels, num_outputs=num_conv_channels,
kernel_size=[3, 3]) kernel_size=[3, 3])
num_masks = 1 if self._masks_are_class_agnostic else self._num_classes num_masks = 1 if self._masks_are_class_agnostic else self._num_classes
mask_predictions = slim.conv2d( mask_predictions = slim.conv2d(
upsampled_features, features,
num_outputs=num_masks, num_outputs=num_masks,
activation_fn=None, activation_fn=None,
normalizer_fn=None, normalizer_fn=None,
......
...@@ -58,6 +58,22 @@ class MaskRCNNMaskHeadTest(test_case.TestCase): ...@@ -58,6 +58,22 @@ class MaskRCNNMaskHeadTest(test_case.TestCase):
features=roi_pooled_features, num_predictions_per_location=1) features=roi_pooled_features, num_predictions_per_location=1)
self.assertAllEqual([64, 1, 20, 14, 14], prediction.get_shape().as_list()) self.assertAllEqual([64, 1, 20, 14, 14], prediction.get_shape().as_list())
def test_prediction_size_with_convolve_then_upsample(self):
mask_prediction_head = mask_head.MaskRCNNMaskHead(
num_classes=20,
conv_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
mask_height=28,
mask_width=28,
mask_prediction_num_conv_layers=2,
mask_prediction_conv_depth=256,
masks_are_class_agnostic=True,
convolve_then_upsample=True)
roi_pooled_features = tf.random_uniform(
[64, 14, 14, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
prediction = mask_prediction_head.predict(
features=roi_pooled_features, num_predictions_per_location=1)
self.assertAllEqual([64, 1, 1, 28, 28], prediction.get_shape().as_list())
class ConvolutionalMaskPredictorTest(test_case.TestCase): class ConvolutionalMaskPredictorTest(test_case.TestCase):
......
...@@ -138,6 +138,7 @@ message WeightSharedConvolutionalBoxPredictor { ...@@ -138,6 +138,7 @@ message WeightSharedConvolutionalBoxPredictor {
// TODO(alirezafathi): Refactor the proto file to be able to configure mask rcnn // TODO(alirezafathi): Refactor the proto file to be able to configure mask rcnn
// head easily. // head easily.
// Next id: 15
message MaskRCNNBoxPredictor { message MaskRCNNBoxPredictor {
// Hyperparameters for fully connected ops used in the box predictor. // Hyperparameters for fully connected ops used in the box predictor.
optional Hyperparams fc_hyperparams = 1; optional Hyperparams fc_hyperparams = 1;
...@@ -178,6 +179,12 @@ message MaskRCNNBoxPredictor { ...@@ -178,6 +179,12 @@ message MaskRCNNBoxPredictor {
// Whether to use one box for all classes rather than a different box for each // Whether to use one box for all classes rather than a different box for each
// class. // class.
optional bool share_box_across_classes = 13 [default = false]; optional bool share_box_across_classes = 13 [default = false];
// Whether to apply convolutions on mask features before upsampling using
// nearest neighbor resizing.
// By default, mask features are resized to [`mask_height`, `mask_width`]
// before applying convolutions and predicting masks.
optional bool convolve_then_upsample_masks = 14 [default = false];
} }
message RfcnBoxPredictor { message RfcnBoxPredictor {
......
...@@ -164,6 +164,10 @@ message FasterRcnn { ...@@ -164,6 +164,10 @@ message FasterRcnn {
// Whether the masks present in groundtruth should be resized in the model to // Whether the masks present in groundtruth should be resized in the model to
// match the image size. // match the image size.
optional bool resize_masks = 36 [default = true]; optional bool resize_masks = 36 [default = true];
// If True, uses implementation of ops with static shape guarantees when
// running evaluation (specifically not is_training if False).
optional bool use_static_shapes_for_eval = 37 [default = false];
} }
......
...@@ -155,6 +155,9 @@ message RandomCropImage { ...@@ -155,6 +155,9 @@ message RandomCropImage {
// value, it is removed from the new image. // value, it is removed from the new image.
optional float overlap_thresh = 6 [default=0.3]; optional float overlap_thresh = 6 [default=0.3];
// Whether to clip the boxes to the cropped image.
optional bool clip_boxes = 8 [default=true];
// Probability of keeping the original image. // Probability of keeping the original image.
optional float random_coef = 7 [default=0.0]; optional float random_coef = 7 [default=0.0];
} }
...@@ -194,6 +197,9 @@ message RandomCropPadImage { ...@@ -194,6 +197,9 @@ message RandomCropPadImage {
// value, it is removed from the new image. // value, it is removed from the new image.
optional float overlap_thresh = 6 [default=0.3]; optional float overlap_thresh = 6 [default=0.3];
// Whether to clip the boxes to the cropped image.
optional bool clip_boxes = 11 [default=true];
// Probability of keeping the original image during the crop operation. // Probability of keeping the original image during the crop operation.
optional float random_coef = 7 [default=0.0]; optional float random_coef = 7 [default=0.0];
...@@ -217,6 +223,9 @@ message RandomCropToAspectRatio { ...@@ -217,6 +223,9 @@ message RandomCropToAspectRatio {
// ratio between a cropped bounding box and the original is less than this // ratio between a cropped bounding box and the original is less than this
// value, it is removed from the new image. // value, it is removed from the new image.
optional float overlap_thresh = 2 [default=0.3]; optional float overlap_thresh = 2 [default=0.3];
// Whether to clip the boxes to the cropped image.
optional bool clip_boxes = 3 [default=true];
} }
// Randomly adds black square patches to an image. // Randomly adds black square patches to an image.
...@@ -285,6 +294,9 @@ message SSDRandomCropOperation { ...@@ -285,6 +294,9 @@ message SSDRandomCropOperation {
// Cropped box area ratio must be above this threhold to be kept. // Cropped box area ratio must be above this threhold to be kept.
optional float overlap_thresh = 6; optional float overlap_thresh = 6;
// Whether to clip the boxes to the cropped image.
optional bool clip_boxes = 8 [default=true];
// Probability a crop operation is skipped. // Probability a crop operation is skipped.
optional float random_coef = 7; optional float random_coef = 7;
} }
...@@ -315,6 +327,9 @@ message SSDRandomCropPadOperation { ...@@ -315,6 +327,9 @@ message SSDRandomCropPadOperation {
// Cropped box area ratio must be above this threhold to be kept. // Cropped box area ratio must be above this threhold to be kept.
optional float overlap_thresh = 6; optional float overlap_thresh = 6;
// Whether to clip the boxes to the cropped image.
optional bool clip_boxes = 13 [default=true];
// Probability a crop operation is skipped. // Probability a crop operation is skipped.
optional float random_coef = 7; optional float random_coef = 7;
...@@ -353,6 +368,9 @@ message SSDRandomCropFixedAspectRatioOperation { ...@@ -353,6 +368,9 @@ message SSDRandomCropFixedAspectRatioOperation {
// Cropped box area ratio must be above this threhold to be kept. // Cropped box area ratio must be above this threhold to be kept.
optional float overlap_thresh = 6; optional float overlap_thresh = 6;
// Whether to clip the boxes to the cropped image.
optional bool clip_boxes = 8 [default=true];
// Probability a crop operation is skipped. // Probability a crop operation is skipped.
optional float random_coef = 7; optional float random_coef = 7;
} }
...@@ -387,6 +405,9 @@ message SSDRandomCropPadFixedAspectRatioOperation { ...@@ -387,6 +405,9 @@ message SSDRandomCropPadFixedAspectRatioOperation {
// Cropped box area ratio must be above this threhold to be kept. // Cropped box area ratio must be above this threhold to be kept.
optional float overlap_thresh = 6; optional float overlap_thresh = 6;
// Whether to clip the boxes to the cropped image.
optional bool clip_boxes = 8 [default=true];
// Probability a crop operation is skipped. // Probability a crop operation is skipped.
optional float random_coef = 7; optional float random_coef = 7;
} }
......
...@@ -12,7 +12,7 @@ import "object_detection/protos/post_processing.proto"; ...@@ -12,7 +12,7 @@ import "object_detection/protos/post_processing.proto";
import "object_detection/protos/region_similarity_calculator.proto"; import "object_detection/protos/region_similarity_calculator.proto";
// Configuration for Single Shot Detection (SSD) models. // Configuration for Single Shot Detection (SSD) models.
// Next id: 21 // Next id: 22
message Ssd { message Ssd {
// Number of classes to predict. // Number of classes to predict.
...@@ -92,11 +92,17 @@ message Ssd { ...@@ -92,11 +92,17 @@ message Ssd {
// Minimum number of effective negative samples. // Minimum number of effective negative samples.
// Only applies if use_expected_classification_loss_under_sampling is true. // Only applies if use_expected_classification_loss_under_sampling is true.
optional float minimum_negative_sampling = 19 [default=0]; optional float min_num_negative_samples = 19 [default=0];
// Desired number of effective negative samples per positive sample. // Desired number of effective negative samples per positive sample.
// Only applies if use_expected_classification_loss_under_sampling is true. // Only applies if use_expected_classification_loss_under_sampling is true.
optional float desired_negative_sampling_ratio = 20 [default=3]; optional float desired_negative_sampling_ratio = 20 [default=3];
// Whether to add an implicit background class to one-hot encodings of
// groundtruth labels. Set to false if using groundtruth labels with an
// explicit background class, using multiclass scores, or if training a single
// class model.
optional bool add_background_class = 21 [default = true];
} }
......
...@@ -6,7 +6,7 @@ import "object_detection/protos/optimizer.proto"; ...@@ -6,7 +6,7 @@ import "object_detection/protos/optimizer.proto";
import "object_detection/protos/preprocessor.proto"; import "object_detection/protos/preprocessor.proto";
// Message for configuring DetectionModel training jobs (train.py). // Message for configuring DetectionModel training jobs (train.py).
// Next id: 27 // Next id: 28
message TrainConfig { message TrainConfig {
// Effective batch size to use for training. // Effective batch size to use for training.
// For TPU (or sync SGD jobs), the batch size per core (or GPU) is going to be // For TPU (or sync SGD jobs), the batch size per core (or GPU) is going to be
...@@ -115,4 +115,7 @@ message TrainConfig { ...@@ -115,4 +115,7 @@ message TrainConfig {
// Whether to use bfloat16 for training. // Whether to use bfloat16 for training.
optional bool use_bfloat16 = 26 [default=false]; optional bool use_bfloat16 = 26 [default=false];
// Whether to summarize gradients.
optional bool summarize_gradients = 27 [default=false];
} }
# Quantized trained SSD with Mobilenet v2 on Open Images v4.
# Non-face boxes are dropped during training and non-face groundtruth boxes are
# ignored when evaluating.
#
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {
ssd {
num_classes: 1
image_resizer {
fixed_shape_resizer {
height: 320
width: 320
}
}
feature_extractor {
type: "ssd_mobilenet_v2"
depth_multiplier: 1.0
min_depth: 16
conv_hyperparams {
regularizer {
l2_regularizer {
weight: 4.0e-05
}
}
initializer {
truncated_normal_initializer {
mean: 0.0
stddev: 0.03
}
}
activation: RELU_6
batch_norm {
decay: 0.9997
center: true
scale: true
epsilon: 0.001
train: true
}
}
pad_to_multiple: 32
use_explicit_padding: true
}
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
}
}
similarity_calculator {
iou_similarity {
}
}
box_predictor {
convolutional_box_predictor {
conv_hyperparams {
regularizer {
l2_regularizer {
weight: 4.0e-05
}
}
initializer {
truncated_normal_initializer {
mean: 0.0
stddev: 0.03
}
}
activation: RELU_6
batch_norm {
decay: 0.9997
center: true
scale: true
epsilon: 0.001
train: true
}
}
min_depth: 0
max_depth: 0
num_layers_before_predictor: 0
use_dropout: false
kernel_size: 3
box_code_size: 4
apply_sigmoid_to_scores: false
}
}
anchor_generator {
ssd_anchor_generator {
num_layers: 6
min_scale: 0.2
max_scale: 0.95
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.3333
height_stride: 16
height_stride: 32
height_stride: 64
height_stride: 128
height_stride: 256
height_stride: 512
width_stride: 16
width_stride: 32
width_stride: 64
width_stride: 128
width_stride: 256
width_stride: 512
}
}
post_processing {
batch_non_max_suppression {
score_threshold: 1.0e-08
iou_threshold: 0.5
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
normalize_loss_by_num_matches: true
loss {
localization_loss {
weighted_smooth_l1 {
}
}
classification_loss {
weighted_sigmoid {
}
}
hard_example_miner {
num_hard_examples: 3000
iou_threshold: 0.99
loss_type: CLASSIFICATION
max_negatives_per_positive: 3
min_negatives_per_image: 10
}
classification_weight: 1.0
localization_weight: 1.0
}
}
}
train_config {
batch_size: 32
data_augmentation_options {
random_horizontal_flip {
keypoint_flip_permutation: 1
keypoint_flip_permutation: 0
keypoint_flip_permutation: 2
keypoint_flip_permutation: 3
keypoint_flip_permutation: 5
keypoint_flip_permutation: 4
}
}
data_augmentation_options {
ssd_random_crop_fixed_aspect_ratio {
}
}
optimizer {
rms_prop_optimizer {
learning_rate {
exponential_decay_learning_rate {
initial_learning_rate: 0.004
decay_steps: 800720
decay_factor: 0.95
}
}
momentum_optimizer_value: 0.9
decay: 0.9
epsilon: 1.0
}
}
fine_tune_checkpoint: ""
}
train_input_reader {
label_map_path: "PATH_TO_BE_CONFIGURED/face_label_map.pbtxt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/face_train.record-?????-of-00100"
}
}
eval_config {
metrics_set: "coco_detection_metrics"
use_moving_averages: true
}
eval_input_reader {
label_map_path: "PATH_TO_BE_CONFIGURED/face_label_map.pbtxt"
shuffle: false
num_readers: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/face_val.record-?????-of-00010"
}
}
graph_rewriter {
quantization {
delay: 500000
weight_bits: 8
activation_bits: 8
}
}
# Quantized trained SSD with Mobilenet v2 on MSCOCO Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {
ssd {
num_classes: 90
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
num_layers: 6
min_scale: 0.2
max_scale: 0.95
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.3333
}
}
image_resizer {
fixed_shape_resizer {
height: 300
width: 300
}
}
box_predictor {
convolutional_box_predictor {
min_depth: 0
max_depth: 0
num_layers_before_predictor: 0
use_dropout: false
dropout_keep_probability: 0.8
kernel_size: 1
box_code_size: 4
apply_sigmoid_to_scores: false
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.9997,
epsilon: 0.001,
}
}
}
}
feature_extractor {
type: 'ssd_mobilenet_v2'
min_depth: 16
depth_multiplier: 1.0
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.9997,
epsilon: 0.001,
}
}
}
loss {
classification_loss {
weighted_sigmoid {
}
}
localization_loss {
weighted_smooth_l1 {
}
}
hard_example_miner {
num_hard_examples: 3000
iou_threshold: 0.99
loss_type: CLASSIFICATION
max_negatives_per_positive: 3
min_negatives_per_image: 3
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
batch_size: 24
optimizer {
rms_prop_optimizer: {
learning_rate: {
exponential_decay_learning_rate {
initial_learning_rate: 0.004
decay_steps: 800720
decay_factor: 0.95
}
}
momentum_optimizer_value: 0.9
decay: 0.9
epsilon: 1.0
}
}
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
fine_tune_checkpoint_type: "detection"
# Note: The below line limits the training process to 200K steps, which we
# empirically found to be sufficient enough to train the pets dataset. This
# effectively bypasses the learning rate schedule (the learning rate will
# never decay). Remove the below line to train indefinitely.
num_steps: 200000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
ssd_random_crop {
}
}
}
train_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record-?????-of-00100"
}
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
}
eval_config: {
num_examples: 8000
# Note: The below line limits the evaluation process to 10 evaluations.
# Remove the below line to evaluate indefinitely.
max_evals: 10
}
eval_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record-?????-of-00010"
}
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
shuffle: false
num_readers: 1
}
graph_rewriter {
quantization {
delay: 48000
weight_bits: 8
activation_bits: 8
}
}
\ No newline at end of file
...@@ -76,12 +76,14 @@ def get_spatial_image_size(image_resizer_config): ...@@ -76,12 +76,14 @@ def get_spatial_image_size(image_resizer_config):
raise ValueError("Unknown image resizer type.") raise ValueError("Unknown image resizer type.")
def get_configs_from_pipeline_file(pipeline_config_path): def get_configs_from_pipeline_file(pipeline_config_path, config_override=None):
"""Reads config from a file containing pipeline_pb2.TrainEvalPipelineConfig. """Reads config from a file containing pipeline_pb2.TrainEvalPipelineConfig.
Args: Args:
pipeline_config_path: Path to pipeline_pb2.TrainEvalPipelineConfig text pipeline_config_path: Path to pipeline_pb2.TrainEvalPipelineConfig text
proto. proto.
config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to
override pipeline_config_path.
Returns: Returns:
Dictionary of configuration objects. Keys are `model`, `train_config`, Dictionary of configuration objects. Keys are `model`, `train_config`,
...@@ -92,6 +94,8 @@ def get_configs_from_pipeline_file(pipeline_config_path): ...@@ -92,6 +94,8 @@ def get_configs_from_pipeline_file(pipeline_config_path):
with tf.gfile.GFile(pipeline_config_path, "r") as f: with tf.gfile.GFile(pipeline_config_path, "r") as f:
proto_str = f.read() proto_str = f.read()
text_format.Merge(proto_str, pipeline_config) text_format.Merge(proto_str, pipeline_config)
if config_override:
text_format.Merge(config_override, pipeline_config)
return create_configs_from_pipeline_proto(pipeline_config) return create_configs_from_pipeline_proto(pipeline_config)
...@@ -430,7 +434,7 @@ def merge_external_params_with_configs(configs, hparams=None, kwargs_dict=None): ...@@ -430,7 +434,7 @@ def merge_external_params_with_configs(configs, hparams=None, kwargs_dict=None):
final learning rates. final learning rates.
In this case key can be one of the following formats: In this case key can be one of the following formats:
1. legacy update: single string that indicates the attribute to be 1. legacy update: single string that indicates the attribute to be
updated. E.g. 'lable_map_path', 'eval_input_path', 'shuffle'. updated. E.g. 'label_map_path', 'eval_input_path', 'shuffle'.
Note that when updating fields (e.g. eval_input_path, eval_shuffle) in Note that when updating fields (e.g. eval_input_path, eval_shuffle) in
eval_input_configs, the override will only be applied when eval_input_configs, the override will only be applied when
eval_input_configs has exactly 1 element. eval_input_configs has exactly 1 element.
......
...@@ -633,11 +633,37 @@ class ObjectDetectionEvaluation(object): ...@@ -633,11 +633,37 @@ class ObjectDetectionEvaluation(object):
nms_max_output_boxes=10000, nms_max_output_boxes=10000,
use_weighted_mean_ap=False, use_weighted_mean_ap=False,
label_id_offset=0, label_id_offset=0,
group_of_weight=0.0): group_of_weight=0.0,
per_image_eval_class=per_image_evaluation.PerImageEvaluation):
"""Constructor.
Args:
num_groundtruth_classes: Number of ground-truth classes.
matching_iou_threshold: IOU threshold used for matching detected boxes
to ground-truth boxes.
nms_iou_threshold: IOU threshold used for non-maximum suppression.
nms_max_output_boxes: Maximum number of boxes returned by non-maximum
suppression.
use_weighted_mean_ap: (optional) boolean which determines if the mean
average precision is computed directly from the scores and tp_fp_labels
of all classes.
label_id_offset: The label id offset.
group_of_weight: Weight of group-of boxes.If set to 0, detections of the
correct class within a group-of box are ignored. If weight is > 0, then
if at least one detection falls within a group-of box with
matching_iou_threshold, weight group_of_weight is added to true
positives. Consequently, if no detection falls within a group-of box,
weight group_of_weight is added to false negatives.
per_image_eval_class: The class that contains functions for computing
per image metrics.
Raises:
ValueError: if num_groundtruth_classes is smaller than 1.
"""
if num_groundtruth_classes < 1: if num_groundtruth_classes < 1:
raise ValueError('Need at least 1 groundtruth class for evaluation.') raise ValueError('Need at least 1 groundtruth class for evaluation.')
self.per_image_eval = per_image_evaluation.PerImageEvaluation( self.per_image_eval = per_image_eval_class(
num_groundtruth_classes=num_groundtruth_classes, num_groundtruth_classes=num_groundtruth_classes,
matching_iou_threshold=matching_iou_threshold, matching_iou_threshold=matching_iou_threshold,
nms_iou_threshold=nms_iou_threshold, nms_iou_threshold=nms_iou_threshold,
...@@ -659,14 +685,16 @@ class ObjectDetectionEvaluation(object): ...@@ -659,14 +685,16 @@ class ObjectDetectionEvaluation(object):
self._initialize_detections() self._initialize_detections()
def _initialize_detections(self): def _initialize_detections(self):
"""Initializes internal data structures."""
self.detection_keys = set() self.detection_keys = set()
self.scores_per_class = [[] for _ in range(self.num_class)] self.scores_per_class = [[] for _ in range(self.num_class)]
self.tp_fp_labels_per_class = [[] for _ in range(self.num_class)] self.tp_fp_labels_per_class = [[] for _ in range(self.num_class)]
self.num_images_correctly_detected_per_class = np.zeros(self.num_class) self.num_images_correctly_detected_per_class = np.zeros(self.num_class)
self.average_precision_per_class = np.empty(self.num_class, dtype=float) self.average_precision_per_class = np.empty(self.num_class, dtype=float)
self.average_precision_per_class.fill(np.nan) self.average_precision_per_class.fill(np.nan)
self.precisions_per_class = [] self.precisions_per_class = [np.nan] * self.num_class
self.recalls_per_class = [] self.recalls_per_class = [np.nan] * self.num_class
self.corloc_per_class = np.ones(self.num_class, dtype=float) self.corloc_per_class = np.ones(self.num_class, dtype=float)
def clear_detections(self): def clear_detections(self):
...@@ -867,8 +895,8 @@ class ObjectDetectionEvaluation(object): ...@@ -867,8 +895,8 @@ class ObjectDetectionEvaluation(object):
logging.info(scores) logging.info(scores)
precision, recall = metrics.compute_precision_recall( precision, recall = metrics.compute_precision_recall(
scores, tp_fp_labels, self.num_gt_instances_per_class[class_index]) scores, tp_fp_labels, self.num_gt_instances_per_class[class_index])
self.precisions_per_class.append(precision) self.precisions_per_class[class_index] = precision
self.recalls_per_class.append(recall) self.recalls_per_class[class_index] = recall
average_precision = metrics.compute_average_precision(precision, recall) average_precision = metrics.compute_average_precision(precision, recall)
self.average_precision_per_class[class_index] = average_precision self.average_precision_per_class[class_index] = average_precision
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment