Unverified Commit 31ae57eb authored by pkulzc's avatar pkulzc Committed by GitHub
Browse files

Minor fixes for object detection (#5613)

* Internal change.

PiperOrigin-RevId: 213914693

* Add original_image_spatial_shape tensor in input dictionary to store shape of the original input image

PiperOrigin-RevId: 214018767

* Remove "groundtruth_confidences" from decoders use "groundtruth_weights" to indicate label confidence.

This also solves a bug that only surfaced now - random crop routines in core/preprocessor.py did not correctly handle "groundtruth_weight" tensors returned by the decoders.

PiperOrigin-RevId: 214091843

* Update CocoMaskEvaluator to allow for a batch of image info, rather than a single image.

PiperOrigin-RevId: 214295305

* Adding the option to be able to summarize gradients.

PiperOrigin-RevId: 214310875

* Adds FasterRCNN inference on CPU

1. Adds a flag use_static_shapes_for_eval to restrict to the ops that guarantees static shape.
2. No filtering of overlapping anchors while clipping the anchors when use_static_shapes_for_eval is set to True.
3. A...
parent 0b0c9cfd
......@@ -15,6 +15,7 @@
"""Tests for ssd resnet v1 feature extractors."""
import abc
import numpy as np
import tensorflow as tf
from object_detection.models import ssd_feature_extractor_test
......@@ -64,12 +65,15 @@ class SSDResnetPpnFeatureExtractorTestBase(
image_width = 128
depth_multiplier = 1
pad_to_multiple = 1
test_image = np.random.rand(4, image_height, image_width, 3)
test_image = tf.constant(np.random.rand(4, image_height, image_width, 3))
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
preprocessed_image = feature_extractor.preprocess(test_image)
self.assertAllClose(preprocessed_image,
test_image - [[123.68, 116.779, 103.939]])
with self.test_session() as sess:
test_image_out, preprocessed_image_out = sess.run(
[test_image, preprocessed_image])
self.assertAllClose(preprocessed_image_out,
test_image_out - [[123.68, 116.779, 103.939]])
def test_variables_only_created_in_scope(self):
depth_multiplier = 1
......
......@@ -134,26 +134,32 @@ class ConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
(len(self._prediction_heads[BOX_ENCODINGS]),
len(input_shapes)))
for stack_index, input_shape in enumerate(input_shapes):
net = tf.keras.Sequential(name='PreHeadConvolutions_%d' % stack_index)
self._shared_nets.append(net)
net = []
# Add additional conv layers before the class predictor.
features_depth = static_shape.get_depth(input_shape)
depth = max(min(features_depth, self._max_depth), self._min_depth)
tf.logging.info(
'depth of additional conv before box predictor: {}'.format(depth))
if depth > 0 and self._num_layers_before_predictor > 0:
for i in range(self._num_layers_before_predictor):
net.add(keras.Conv2D(depth, [1, 1],
name='Conv2d_%d_1x1_%d' % (i, depth),
padding='SAME',
**self._conv_hyperparams.params()))
net.add(self._conv_hyperparams.build_batch_norm(
net.append(keras.Conv2D(depth, [1, 1],
name='SharedConvolutions_%d/Conv2d_%d_1x1_%d'
% (stack_index, i, depth),
padding='SAME',
**self._conv_hyperparams.params()))
net.append(self._conv_hyperparams.build_batch_norm(
training=(self._is_training and not self._freeze_batchnorm),
name='Conv2d_%d_1x1_%d_norm' % (i, depth)))
net.add(self._conv_hyperparams.build_activation_layer(
name='Conv2d_%d_1x1_%d_activation' % (i, depth),
name='SharedConvolutions_%d/Conv2d_%d_1x1_%d_norm'
% (stack_index, i, depth)))
net.append(self._conv_hyperparams.build_activation_layer(
name='SharedConvolutions_%d/Conv2d_%d_1x1_%d_activation'
% (stack_index, i, depth),
))
# Until certain bugs are fixed in checkpointable lists,
# this net must be appended only once it's been filled with layers
self._shared_nets.append(net)
self.built = True
def _predict(self, image_features):
......@@ -175,10 +181,11 @@ class ConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
"""
predictions = collections.defaultdict(list)
for (index, image_feature) in enumerate(image_features):
for (index, net) in enumerate(image_features):
# Apply shared conv layers before the head predictors.
net = self._shared_nets[index](image_feature)
for layer in self._shared_nets[index]:
net = layer(net)
for head_name in self._prediction_heads:
head_obj = self._prediction_heads[head_name][index]
......
......@@ -181,8 +181,8 @@ class ConvolutionalKerasBoxPredictorTest(test_case.TestCase):
self.assertAllEqual(objectness_predictions_shape,
[4, expected_num_anchors, 1])
expected_variable_set = set([
'BoxPredictor/PreHeadConvolutions_0/Conv2d_0_1x1_32/bias',
'BoxPredictor/PreHeadConvolutions_0/Conv2d_0_1x1_32/kernel',
'BoxPredictor/SharedConvolutions_0/Conv2d_0_1x1_32/bias',
'BoxPredictor/SharedConvolutions_0/Conv2d_0_1x1_32/kernel',
'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/bias',
'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/kernel',
'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor/bias',
......
......@@ -34,16 +34,18 @@ class MaskRCNNClassHead(head.Head):
https://arxiv.org/abs/1703.06870
"""
def __init__(self, is_training, num_classes, fc_hyperparams_fn,
use_dropout, dropout_keep_prob):
def __init__(self,
is_training,
num_class_slots,
fc_hyperparams_fn,
use_dropout,
dropout_keep_prob):
"""Constructor.
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: number of classes. Note that num_classes *does not*
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
num_class_slots: number of class slots. Note that num_class_slots may or
may not include an implicit background category.
fc_hyperparams_fn: A function to generate tf-slim arg_scope with
hyperparameters for fully connected ops.
use_dropout: Option to use dropout or not. Note that a single dropout
......@@ -54,7 +56,7 @@ class MaskRCNNClassHead(head.Head):
"""
super(MaskRCNNClassHead, self).__init__()
self._is_training = is_training
self._num_classes = num_classes
self._num_class_slots = num_class_slots
self._fc_hyperparams_fn = fc_hyperparams_fn
self._use_dropout = use_dropout
self._dropout_keep_prob = dropout_keep_prob
......@@ -70,7 +72,7 @@ class MaskRCNNClassHead(head.Head):
Returns:
class_predictions_with_background: A float tensor of shape
[batch_size, 1, num_classes + 1] representing the class predictions for
[batch_size, 1, num_class_slots] representing the class predictions for
the proposals.
Raises:
......@@ -91,11 +93,12 @@ class MaskRCNNClassHead(head.Head):
with slim.arg_scope(self._fc_hyperparams_fn()):
class_predictions_with_background = slim.fully_connected(
flattened_roi_pooled_features,
self._num_classes + 1,
self._num_class_slots,
activation_fn=None,
scope='ClassPredictor')
class_predictions_with_background = tf.reshape(
class_predictions_with_background, [-1, 1, self._num_classes + 1])
class_predictions_with_background,
[-1, 1, self._num_class_slots])
return class_predictions_with_background
......@@ -104,7 +107,7 @@ class ConvolutionalClassHead(head.Head):
def __init__(self,
is_training,
num_classes,
num_class_slots,
use_dropout,
dropout_keep_prob,
kernel_size,
......@@ -115,7 +118,8 @@ class ConvolutionalClassHead(head.Head):
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: Number of classes.
num_class_slots: number of class slots. Note that num_class_slots may or
may not include an implicit background category.
use_dropout: Option to use dropout or not. Note that a single dropout
op is applied here prior to both box and class predictions, which stands
in contrast to the ConvolutionalBoxPredictor below.
......@@ -137,7 +141,7 @@ class ConvolutionalClassHead(head.Head):
"""
super(ConvolutionalClassHead, self).__init__()
self._is_training = is_training
self._num_classes = num_classes
self._num_class_slots = num_class_slots
self._use_dropout = use_dropout
self._dropout_keep_prob = dropout_keep_prob
self._kernel_size = kernel_size
......@@ -156,12 +160,10 @@ class ConvolutionalClassHead(head.Head):
Returns:
class_predictions_with_background: A float tensors of shape
[batch_size, num_anchors, num_classes + 1] representing the class
[batch_size, num_anchors, num_class_slots] representing the class
predictions for the proposals.
"""
net = features
# Add a slot for the background class.
num_class_slots = self._num_classes + 1
if self._use_dropout:
net = slim.dropout(net, keep_prob=self._dropout_keep_prob)
if self._use_depthwise:
......@@ -171,7 +173,7 @@ class ConvolutionalClassHead(head.Head):
rate=1, scope='ClassPredictor_depthwise')
class_predictions_with_background = slim.conv2d(
class_predictions_with_background,
num_predictions_per_location * num_class_slots, [1, 1],
num_predictions_per_location * self._num_class_slots, [1, 1],
activation_fn=None,
normalizer_fn=None,
normalizer_params=None,
......@@ -179,7 +181,7 @@ class ConvolutionalClassHead(head.Head):
else:
class_predictions_with_background = slim.conv2d(
net,
num_predictions_per_location * num_class_slots,
num_predictions_per_location * self._num_class_slots,
[self._kernel_size, self._kernel_size],
activation_fn=None,
normalizer_fn=None,
......@@ -194,7 +196,8 @@ class ConvolutionalClassHead(head.Head):
if batch_size is None:
batch_size = tf.shape(features)[0]
class_predictions_with_background = tf.reshape(
class_predictions_with_background, [batch_size, -1, num_class_slots])
class_predictions_with_background,
[batch_size, -1, self._num_class_slots])
return class_predictions_with_background
......@@ -208,7 +211,7 @@ class WeightSharedConvolutionalClassHead(head.Head):
"""
def __init__(self,
num_classes,
num_class_slots,
kernel_size=3,
class_prediction_bias_init=0.0,
use_dropout=False,
......@@ -218,10 +221,8 @@ class WeightSharedConvolutionalClassHead(head.Head):
"""Constructor.
Args:
num_classes: number of classes. Note that num_classes *does not*
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
num_class_slots: number of class slots. Note that num_class_slots may or
may not include an implicit background category.
kernel_size: Size of final convolution kernel.
class_prediction_bias_init: constant value to initialize bias of the last
conv2d layer before class prediction.
......@@ -233,7 +234,7 @@ class WeightSharedConvolutionalClassHead(head.Head):
as inputs and returns tensors).
"""
super(WeightSharedConvolutionalClassHead, self).__init__()
self._num_classes = num_classes
self._num_class_slots = num_class_slots
self._kernel_size = kernel_size
self._class_prediction_bias_init = class_prediction_bias_init
self._use_dropout = use_dropout
......@@ -252,12 +253,10 @@ class WeightSharedConvolutionalClassHead(head.Head):
Returns:
class_predictions_with_background: A tensor of shape
[batch_size, num_anchors, num_classes + 1] representing the class
[batch_size, num_anchors, num_class_slots] representing the class
predictions for the proposals.
"""
class_predictions_net = features
num_class_slots = self._num_classes + 1
# Add a slot for the background class.
if self._use_dropout:
class_predictions_net = slim.dropout(
class_predictions_net, keep_prob=self._dropout_keep_prob)
......@@ -267,7 +266,7 @@ class WeightSharedConvolutionalClassHead(head.Head):
conv_op = slim.conv2d
class_predictions_with_background = conv_op(
class_predictions_net,
num_predictions_per_location * num_class_slots,
num_predictions_per_location * self._num_class_slots,
[self._kernel_size, self._kernel_size],
activation_fn=None, stride=1, padding='SAME',
normalizer_fn=None,
......@@ -280,5 +279,6 @@ class WeightSharedConvolutionalClassHead(head.Head):
class_predictions_with_background = self._score_converter_fn(
class_predictions_with_background)
class_predictions_with_background = tf.reshape(
class_predictions_with_background, [batch_size, -1, num_class_slots])
class_predictions_with_background,
[batch_size, -1, self._num_class_slots])
return class_predictions_with_background
......@@ -46,7 +46,7 @@ class MaskRCNNClassHeadTest(test_case.TestCase):
def test_prediction_size(self):
class_prediction_head = class_head.MaskRCNNClassHead(
is_training=False,
num_classes=20,
num_class_slots=20,
fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
use_dropout=True,
dropout_keep_prob=0.5)
......@@ -54,7 +54,7 @@ class MaskRCNNClassHeadTest(test_case.TestCase):
[64, 7, 7, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
prediction = class_prediction_head.predict(
features=roi_pooled_features, num_predictions_per_location=1)
self.assertAllEqual([64, 1, 21], prediction.get_shape().as_list())
self.assertAllEqual([64, 1, 20], prediction.get_shape().as_list())
class ConvolutionalClassPredictorTest(test_case.TestCase):
......@@ -80,7 +80,7 @@ class ConvolutionalClassPredictorTest(test_case.TestCase):
def test_prediction_size(self):
class_prediction_head = class_head.ConvolutionalClassHead(
is_training=True,
num_classes=20,
num_class_slots=20,
use_dropout=True,
dropout_keep_prob=0.5,
kernel_size=3)
......@@ -89,7 +89,7 @@ class ConvolutionalClassPredictorTest(test_case.TestCase):
class_predictions = class_prediction_head.predict(
features=image_feature,
num_predictions_per_location=1)
self.assertAllEqual([64, 323, 21],
self.assertAllEqual([64, 323, 20],
class_predictions.get_shape().as_list())
......@@ -115,13 +115,13 @@ class WeightSharedConvolutionalClassPredictorTest(test_case.TestCase):
def test_prediction_size(self):
class_prediction_head = (
class_head.WeightSharedConvolutionalClassHead(num_classes=20))
class_head.WeightSharedConvolutionalClassHead(num_class_slots=20))
image_feature = tf.random_uniform(
[64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
class_predictions = class_prediction_head.predict(
features=image_feature,
num_predictions_per_location=1)
self.assertAllEqual([64, 323, 21], class_predictions.get_shape().as_list())
self.assertAllEqual([64, 323, 20], class_predictions.get_shape().as_list())
if __name__ == '__main__':
......
......@@ -91,7 +91,7 @@ class ConvolutionalBoxHead(head.KerasHead):
tf.keras.layers.Conv2D(
num_predictions_per_location * self._box_code_size, [1, 1],
name='BoxEncodingPredictor',
**conv_hyperparams.params(activation=None)))
**conv_hyperparams.params(use_bias=True)))
else:
self._box_encoder_layers.append(
tf.keras.layers.Conv2D(
......@@ -99,7 +99,7 @@ class ConvolutionalBoxHead(head.KerasHead):
[self._kernel_size, self._kernel_size],
padding='SAME',
name='BoxEncodingPredictor',
**conv_hyperparams.params(activation=None)))
**conv_hyperparams.params(use_bias=True)))
def _predict(self, features):
"""Predicts boxes.
......
......@@ -29,7 +29,7 @@ class ConvolutionalClassHead(head.KerasHead):
def __init__(self,
is_training,
num_classes,
num_class_slots,
use_dropout,
dropout_keep_prob,
kernel_size,
......@@ -43,7 +43,8 @@ class ConvolutionalClassHead(head.KerasHead):
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: Number of classes.
num_class_slots: number of class slots. Note that num_class_slots may or
may not include an implicit background category.
use_dropout: Option to use dropout or not. Note that a single dropout
op is applied here prior to both box and class predictions, which stands
in contrast to the ConvolutionalBoxPredictor below.
......@@ -73,13 +74,12 @@ class ConvolutionalClassHead(head.KerasHead):
"""
super(ConvolutionalClassHead, self).__init__(name=name)
self._is_training = is_training
self._num_classes = num_classes
self._use_dropout = use_dropout
self._dropout_keep_prob = dropout_keep_prob
self._kernel_size = kernel_size
self._class_prediction_bias_init = class_prediction_bias_init
self._use_depthwise = use_depthwise
self._num_class_slots = self._num_classes + 1
self._num_class_slots = num_class_slots
self._class_predictor_layers = []
......@@ -110,7 +110,7 @@ class ConvolutionalClassHead(head.KerasHead):
tf.keras.layers.Conv2D(
num_predictions_per_location * self._num_class_slots, [1, 1],
name='ClassPredictor',
**conv_hyperparams.params(activation=None)))
**conv_hyperparams.params(use_bias=True)))
else:
self._class_predictor_layers.append(
tf.keras.layers.Conv2D(
......@@ -120,7 +120,7 @@ class ConvolutionalClassHead(head.KerasHead):
name='ClassPredictor',
bias_initializer=tf.constant_initializer(
self._class_prediction_bias_init),
**conv_hyperparams.params(activation=None)))
**conv_hyperparams.params(use_bias=True)))
def _predict(self, features):
"""Predicts boxes.
......@@ -131,7 +131,7 @@ class ConvolutionalClassHead(head.KerasHead):
Returns:
class_predictions_with_background: A float tensor of shape
[batch_size, num_anchors, num_classes + 1] representing the class
[batch_size, num_anchors, num_class_slots] representing the class
predictions for the proposals.
"""
# Add a slot for the background class.
......
......@@ -45,7 +45,7 @@ class ConvolutionalKerasClassPredictorTest(test_case.TestCase):
conv_hyperparams = self._build_conv_hyperparams()
class_prediction_head = keras_class_head.ConvolutionalClassHead(
is_training=True,
num_classes=20,
num_class_slots=20,
use_dropout=True,
dropout_keep_prob=0.5,
kernel_size=3,
......@@ -56,7 +56,7 @@ class ConvolutionalKerasClassPredictorTest(test_case.TestCase):
image_feature = tf.random_uniform(
[64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
class_predictions = class_prediction_head(image_feature,)
self.assertAllEqual([64, 323, 21],
self.assertAllEqual([64, 323, 20],
class_predictions.get_shape().as_list())
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
......
......@@ -124,7 +124,7 @@ class ConvolutionalMaskHead(head.KerasHead):
tf.keras.layers.Conv2D(
num_predictions_per_location * num_mask_channels, [1, 1],
name='MaskPredictor',
**conv_hyperparams.params(activation=None)))
**conv_hyperparams.params(use_bias=True)))
else:
self._mask_predictor_layers.append(
tf.keras.layers.Conv2D(
......@@ -132,7 +132,7 @@ class ConvolutionalMaskHead(head.KerasHead):
[self._kernel_size, self._kernel_size],
padding='SAME',
name='MaskPredictor',
**conv_hyperparams.params(activation=None)))
**conv_hyperparams.params(use_bias=True)))
def _predict(self, features):
"""Predicts boxes.
......
......@@ -23,6 +23,7 @@ import math
import tensorflow as tf
from object_detection.predictors.heads import head
from object_detection.utils import ops
slim = tf.contrib.slim
......@@ -41,7 +42,8 @@ class MaskRCNNMaskHead(head.Head):
mask_width=14,
mask_prediction_num_conv_layers=2,
mask_prediction_conv_depth=256,
masks_are_class_agnostic=False):
masks_are_class_agnostic=False,
convolve_then_upsample=False):
"""Constructor.
Args:
......@@ -62,6 +64,10 @@ class MaskRCNNMaskHead(head.Head):
image features.
masks_are_class_agnostic: Boolean determining if the mask-head is
class-agnostic or not.
convolve_then_upsample: Whether to apply convolutions on mask features
before upsampling using nearest neighbor resizing. Otherwise, mask
features are resized to [`mask_height`, `mask_width`] using bilinear
resizing before applying convolutions.
Raises:
ValueError: conv_hyperparams_fn is None.
......@@ -74,6 +80,7 @@ class MaskRCNNMaskHead(head.Head):
self._mask_prediction_num_conv_layers = mask_prediction_num_conv_layers
self._mask_prediction_conv_depth = mask_prediction_conv_depth
self._masks_are_class_agnostic = masks_are_class_agnostic
self._convolve_then_upsample = convolve_then_upsample
if conv_hyperparams_fn is None:
raise ValueError('conv_hyperparams_fn is None.')
......@@ -135,17 +142,30 @@ class MaskRCNNMaskHead(head.Head):
num_conv_channels = self._get_mask_predictor_conv_depth(
num_feature_channels, self._num_classes)
with slim.arg_scope(self._conv_hyperparams_fn()):
upsampled_features = tf.image.resize_bilinear(
features, [self._mask_height, self._mask_width],
align_corners=True)
if not self._convolve_then_upsample:
features = tf.image.resize_bilinear(
features, [self._mask_height, self._mask_width],
align_corners=True)
for _ in range(self._mask_prediction_num_conv_layers - 1):
upsampled_features = slim.conv2d(
upsampled_features,
features = slim.conv2d(
features,
num_outputs=num_conv_channels,
kernel_size=[3, 3])
if self._convolve_then_upsample:
# Replace Transposed Convolution with a Nearest Neighbor upsampling step
# followed by 3x3 convolution.
height_scale = self._mask_height / features.shape[1].value
width_scale = self._mask_width / features.shape[2].value
features = ops.nearest_neighbor_upsampling(
features, height_scale=height_scale, width_scale=width_scale)
features = slim.conv2d(
features,
num_outputs=num_conv_channels,
kernel_size=[3, 3])
num_masks = 1 if self._masks_are_class_agnostic else self._num_classes
mask_predictions = slim.conv2d(
upsampled_features,
features,
num_outputs=num_masks,
activation_fn=None,
normalizer_fn=None,
......
......@@ -58,6 +58,22 @@ class MaskRCNNMaskHeadTest(test_case.TestCase):
features=roi_pooled_features, num_predictions_per_location=1)
self.assertAllEqual([64, 1, 20, 14, 14], prediction.get_shape().as_list())
def test_prediction_size_with_convolve_then_upsample(self):
mask_prediction_head = mask_head.MaskRCNNMaskHead(
num_classes=20,
conv_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
mask_height=28,
mask_width=28,
mask_prediction_num_conv_layers=2,
mask_prediction_conv_depth=256,
masks_are_class_agnostic=True,
convolve_then_upsample=True)
roi_pooled_features = tf.random_uniform(
[64, 14, 14, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
prediction = mask_prediction_head.predict(
features=roi_pooled_features, num_predictions_per_location=1)
self.assertAllEqual([64, 1, 1, 28, 28], prediction.get_shape().as_list())
class ConvolutionalMaskPredictorTest(test_case.TestCase):
......
......@@ -138,6 +138,7 @@ message WeightSharedConvolutionalBoxPredictor {
// TODO(alirezafathi): Refactor the proto file to be able to configure mask rcnn
// head easily.
// Next id: 15
message MaskRCNNBoxPredictor {
// Hyperparameters for fully connected ops used in the box predictor.
optional Hyperparams fc_hyperparams = 1;
......@@ -178,6 +179,12 @@ message MaskRCNNBoxPredictor {
// Whether to use one box for all classes rather than a different box for each
// class.
optional bool share_box_across_classes = 13 [default = false];
// Whether to apply convolutions on mask features before upsampling using
// nearest neighbor resizing.
// By default, mask features are resized to [`mask_height`, `mask_width`]
// before applying convolutions and predicting masks.
optional bool convolve_then_upsample_masks = 14 [default = false];
}
message RfcnBoxPredictor {
......
......@@ -164,6 +164,10 @@ message FasterRcnn {
// Whether the masks present in groundtruth should be resized in the model to
// match the image size.
optional bool resize_masks = 36 [default = true];
// If True, uses implementation of ops with static shape guarantees when
// running evaluation (specifically not is_training if False).
optional bool use_static_shapes_for_eval = 37 [default = false];
}
......
......@@ -155,6 +155,9 @@ message RandomCropImage {
// value, it is removed from the new image.
optional float overlap_thresh = 6 [default=0.3];
// Whether to clip the boxes to the cropped image.
optional bool clip_boxes = 8 [default=true];
// Probability of keeping the original image.
optional float random_coef = 7 [default=0.0];
}
......@@ -194,6 +197,9 @@ message RandomCropPadImage {
// value, it is removed from the new image.
optional float overlap_thresh = 6 [default=0.3];
// Whether to clip the boxes to the cropped image.
optional bool clip_boxes = 11 [default=true];
// Probability of keeping the original image during the crop operation.
optional float random_coef = 7 [default=0.0];
......@@ -217,6 +223,9 @@ message RandomCropToAspectRatio {
// ratio between a cropped bounding box and the original is less than this
// value, it is removed from the new image.
optional float overlap_thresh = 2 [default=0.3];
// Whether to clip the boxes to the cropped image.
optional bool clip_boxes = 3 [default=true];
}
// Randomly adds black square patches to an image.
......@@ -285,6 +294,9 @@ message SSDRandomCropOperation {
// Cropped box area ratio must be above this threhold to be kept.
optional float overlap_thresh = 6;
// Whether to clip the boxes to the cropped image.
optional bool clip_boxes = 8 [default=true];
// Probability a crop operation is skipped.
optional float random_coef = 7;
}
......@@ -315,6 +327,9 @@ message SSDRandomCropPadOperation {
// Cropped box area ratio must be above this threhold to be kept.
optional float overlap_thresh = 6;
// Whether to clip the boxes to the cropped image.
optional bool clip_boxes = 13 [default=true];
// Probability a crop operation is skipped.
optional float random_coef = 7;
......@@ -353,6 +368,9 @@ message SSDRandomCropFixedAspectRatioOperation {
// Cropped box area ratio must be above this threhold to be kept.
optional float overlap_thresh = 6;
// Whether to clip the boxes to the cropped image.
optional bool clip_boxes = 8 [default=true];
// Probability a crop operation is skipped.
optional float random_coef = 7;
}
......@@ -387,6 +405,9 @@ message SSDRandomCropPadFixedAspectRatioOperation {
// Cropped box area ratio must be above this threhold to be kept.
optional float overlap_thresh = 6;
// Whether to clip the boxes to the cropped image.
optional bool clip_boxes = 8 [default=true];
// Probability a crop operation is skipped.
optional float random_coef = 7;
}
......
......@@ -12,7 +12,7 @@ import "object_detection/protos/post_processing.proto";
import "object_detection/protos/region_similarity_calculator.proto";
// Configuration for Single Shot Detection (SSD) models.
// Next id: 21
// Next id: 22
message Ssd {
// Number of classes to predict.
......@@ -92,11 +92,17 @@ message Ssd {
// Minimum number of effective negative samples.
// Only applies if use_expected_classification_loss_under_sampling is true.
optional float minimum_negative_sampling = 19 [default=0];
optional float min_num_negative_samples = 19 [default=0];
// Desired number of effective negative samples per positive sample.
// Only applies if use_expected_classification_loss_under_sampling is true.
optional float desired_negative_sampling_ratio = 20 [default=3];
// Whether to add an implicit background class to one-hot encodings of
// groundtruth labels. Set to false if using groundtruth labels with an
// explicit background class, using multiclass scores, or if training a single
// class model.
optional bool add_background_class = 21 [default = true];
}
......
......@@ -6,7 +6,7 @@ import "object_detection/protos/optimizer.proto";
import "object_detection/protos/preprocessor.proto";
// Message for configuring DetectionModel training jobs (train.py).
// Next id: 27
// Next id: 28
message TrainConfig {
// Effective batch size to use for training.
// For TPU (or sync SGD jobs), the batch size per core (or GPU) is going to be
......@@ -115,4 +115,7 @@ message TrainConfig {
// Whether to use bfloat16 for training.
optional bool use_bfloat16 = 26 [default=false];
// Whether to summarize gradients.
optional bool summarize_gradients = 27 [default=false];
}
# Quantized trained SSD with Mobilenet v2 on Open Images v4.
# Non-face boxes are dropped during training and non-face groundtruth boxes are
# ignored when evaluating.
#
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {
ssd {
num_classes: 1
image_resizer {
fixed_shape_resizer {
height: 320
width: 320
}
}
feature_extractor {
type: "ssd_mobilenet_v2"
depth_multiplier: 1.0
min_depth: 16
conv_hyperparams {
regularizer {
l2_regularizer {
weight: 4.0e-05
}
}
initializer {
truncated_normal_initializer {
mean: 0.0
stddev: 0.03
}
}
activation: RELU_6
batch_norm {
decay: 0.9997
center: true
scale: true
epsilon: 0.001
train: true
}
}
pad_to_multiple: 32
use_explicit_padding: true
}
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
}
}
similarity_calculator {
iou_similarity {
}
}
box_predictor {
convolutional_box_predictor {
conv_hyperparams {
regularizer {
l2_regularizer {
weight: 4.0e-05
}
}
initializer {
truncated_normal_initializer {
mean: 0.0
stddev: 0.03
}
}
activation: RELU_6
batch_norm {
decay: 0.9997
center: true
scale: true
epsilon: 0.001
train: true
}
}
min_depth: 0
max_depth: 0
num_layers_before_predictor: 0
use_dropout: false
kernel_size: 3
box_code_size: 4
apply_sigmoid_to_scores: false
}
}
anchor_generator {
ssd_anchor_generator {
num_layers: 6
min_scale: 0.2
max_scale: 0.95
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.3333
height_stride: 16
height_stride: 32
height_stride: 64
height_stride: 128
height_stride: 256
height_stride: 512
width_stride: 16
width_stride: 32
width_stride: 64
width_stride: 128
width_stride: 256
width_stride: 512
}
}
post_processing {
batch_non_max_suppression {
score_threshold: 1.0e-08
iou_threshold: 0.5
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
normalize_loss_by_num_matches: true
loss {
localization_loss {
weighted_smooth_l1 {
}
}
classification_loss {
weighted_sigmoid {
}
}
hard_example_miner {
num_hard_examples: 3000
iou_threshold: 0.99
loss_type: CLASSIFICATION
max_negatives_per_positive: 3
min_negatives_per_image: 10
}
classification_weight: 1.0
localization_weight: 1.0
}
}
}
train_config {
batch_size: 32
data_augmentation_options {
random_horizontal_flip {
keypoint_flip_permutation: 1
keypoint_flip_permutation: 0
keypoint_flip_permutation: 2
keypoint_flip_permutation: 3
keypoint_flip_permutation: 5
keypoint_flip_permutation: 4
}
}
data_augmentation_options {
ssd_random_crop_fixed_aspect_ratio {
}
}
optimizer {
rms_prop_optimizer {
learning_rate {
exponential_decay_learning_rate {
initial_learning_rate: 0.004
decay_steps: 800720
decay_factor: 0.95
}
}
momentum_optimizer_value: 0.9
decay: 0.9
epsilon: 1.0
}
}
fine_tune_checkpoint: ""
}
train_input_reader {
label_map_path: "PATH_TO_BE_CONFIGURED/face_label_map.pbtxt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/face_train.record-?????-of-00100"
}
}
eval_config {
metrics_set: "coco_detection_metrics"
use_moving_averages: true
}
eval_input_reader {
label_map_path: "PATH_TO_BE_CONFIGURED/face_label_map.pbtxt"
shuffle: false
num_readers: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/face_val.record-?????-of-00010"
}
}
graph_rewriter {
quantization {
delay: 500000
weight_bits: 8
activation_bits: 8
}
}
# Quantized trained SSD with Mobilenet v2 on MSCOCO Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {
ssd {
num_classes: 90
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
num_layers: 6
min_scale: 0.2
max_scale: 0.95
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.3333
}
}
image_resizer {
fixed_shape_resizer {
height: 300
width: 300
}
}
box_predictor {
convolutional_box_predictor {
min_depth: 0
max_depth: 0
num_layers_before_predictor: 0
use_dropout: false
dropout_keep_probability: 0.8
kernel_size: 1
box_code_size: 4
apply_sigmoid_to_scores: false
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.9997,
epsilon: 0.001,
}
}
}
}
feature_extractor {
type: 'ssd_mobilenet_v2'
min_depth: 16
depth_multiplier: 1.0
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.9997,
epsilon: 0.001,
}
}
}
loss {
classification_loss {
weighted_sigmoid {
}
}
localization_loss {
weighted_smooth_l1 {
}
}
hard_example_miner {
num_hard_examples: 3000
iou_threshold: 0.99
loss_type: CLASSIFICATION
max_negatives_per_positive: 3
min_negatives_per_image: 3
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
batch_size: 24
optimizer {
rms_prop_optimizer: {
learning_rate: {
exponential_decay_learning_rate {
initial_learning_rate: 0.004
decay_steps: 800720
decay_factor: 0.95
}
}
momentum_optimizer_value: 0.9
decay: 0.9
epsilon: 1.0
}
}
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
fine_tune_checkpoint_type: "detection"
# Note: The below line limits the training process to 200K steps, which we
# empirically found to be sufficient enough to train the pets dataset. This
# effectively bypasses the learning rate schedule (the learning rate will
# never decay). Remove the below line to train indefinitely.
num_steps: 200000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
ssd_random_crop {
}
}
}
train_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record-?????-of-00100"
}
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
}
eval_config: {
num_examples: 8000
# Note: The below line limits the evaluation process to 10 evaluations.
# Remove the below line to evaluate indefinitely.
max_evals: 10
}
eval_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record-?????-of-00010"
}
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
shuffle: false
num_readers: 1
}
graph_rewriter {
quantization {
delay: 48000
weight_bits: 8
activation_bits: 8
}
}
\ No newline at end of file
......@@ -76,12 +76,14 @@ def get_spatial_image_size(image_resizer_config):
raise ValueError("Unknown image resizer type.")
def get_configs_from_pipeline_file(pipeline_config_path):
def get_configs_from_pipeline_file(pipeline_config_path, config_override=None):
"""Reads config from a file containing pipeline_pb2.TrainEvalPipelineConfig.
Args:
pipeline_config_path: Path to pipeline_pb2.TrainEvalPipelineConfig text
proto.
config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to
override pipeline_config_path.
Returns:
Dictionary of configuration objects. Keys are `model`, `train_config`,
......@@ -92,6 +94,8 @@ def get_configs_from_pipeline_file(pipeline_config_path):
with tf.gfile.GFile(pipeline_config_path, "r") as f:
proto_str = f.read()
text_format.Merge(proto_str, pipeline_config)
if config_override:
text_format.Merge(config_override, pipeline_config)
return create_configs_from_pipeline_proto(pipeline_config)
......@@ -430,7 +434,7 @@ def merge_external_params_with_configs(configs, hparams=None, kwargs_dict=None):
final learning rates.
In this case key can be one of the following formats:
1. legacy update: single string that indicates the attribute to be
updated. E.g. 'lable_map_path', 'eval_input_path', 'shuffle'.
updated. E.g. 'label_map_path', 'eval_input_path', 'shuffle'.
Note that when updating fields (e.g. eval_input_path, eval_shuffle) in
eval_input_configs, the override will only be applied when
eval_input_configs has exactly 1 element.
......
......@@ -633,11 +633,37 @@ class ObjectDetectionEvaluation(object):
nms_max_output_boxes=10000,
use_weighted_mean_ap=False,
label_id_offset=0,
group_of_weight=0.0):
group_of_weight=0.0,
per_image_eval_class=per_image_evaluation.PerImageEvaluation):
"""Constructor.
Args:
num_groundtruth_classes: Number of ground-truth classes.
matching_iou_threshold: IOU threshold used for matching detected boxes
to ground-truth boxes.
nms_iou_threshold: IOU threshold used for non-maximum suppression.
nms_max_output_boxes: Maximum number of boxes returned by non-maximum
suppression.
use_weighted_mean_ap: (optional) boolean which determines if the mean
average precision is computed directly from the scores and tp_fp_labels
of all classes.
label_id_offset: The label id offset.
group_of_weight: Weight of group-of boxes.If set to 0, detections of the
correct class within a group-of box are ignored. If weight is > 0, then
if at least one detection falls within a group-of box with
matching_iou_threshold, weight group_of_weight is added to true
positives. Consequently, if no detection falls within a group-of box,
weight group_of_weight is added to false negatives.
per_image_eval_class: The class that contains functions for computing
per image metrics.
Raises:
ValueError: if num_groundtruth_classes is smaller than 1.
"""
if num_groundtruth_classes < 1:
raise ValueError('Need at least 1 groundtruth class for evaluation.')
self.per_image_eval = per_image_evaluation.PerImageEvaluation(
self.per_image_eval = per_image_eval_class(
num_groundtruth_classes=num_groundtruth_classes,
matching_iou_threshold=matching_iou_threshold,
nms_iou_threshold=nms_iou_threshold,
......@@ -659,14 +685,16 @@ class ObjectDetectionEvaluation(object):
self._initialize_detections()
def _initialize_detections(self):
"""Initializes internal data structures."""
self.detection_keys = set()
self.scores_per_class = [[] for _ in range(self.num_class)]
self.tp_fp_labels_per_class = [[] for _ in range(self.num_class)]
self.num_images_correctly_detected_per_class = np.zeros(self.num_class)
self.average_precision_per_class = np.empty(self.num_class, dtype=float)
self.average_precision_per_class.fill(np.nan)
self.precisions_per_class = []
self.recalls_per_class = []
self.precisions_per_class = [np.nan] * self.num_class
self.recalls_per_class = [np.nan] * self.num_class
self.corloc_per_class = np.ones(self.num_class, dtype=float)
def clear_detections(self):
......@@ -867,8 +895,8 @@ class ObjectDetectionEvaluation(object):
logging.info(scores)
precision, recall = metrics.compute_precision_recall(
scores, tp_fp_labels, self.num_gt_instances_per_class[class_index])
self.precisions_per_class.append(precision)
self.recalls_per_class.append(recall)
self.precisions_per_class[class_index] = precision
self.recalls_per_class[class_index] = recall
average_precision = metrics.compute_average_precision(precision, recall)
self.average_precision_per_class[class_index] = average_precision
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment