Commit e00e0e13 authored by dreamdragon's avatar dreamdragon
Browse files

Merge remote-tracking branch 'upstream/master'

parents b915db4e 402b561b
......@@ -85,41 +85,44 @@ class SSDMobileNetV2KerasFeatureExtractor(
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams,
name=name)
feature_map_layout = {
self._feature_map_layout = {
'from_layer': ['layer_15/expansion_output', 'layer_19', '', '', '', ''],
'layer_depth': [-1, -1, 512, 256, 256, 128],
'use_depthwise': self._use_depthwise,
'use_explicit_padding': self._use_explicit_padding,
}
with tf.name_scope('MobilenetV2'):
full_mobilenet_v2 = mobilenet_v2.mobilenet_v2(
batchnorm_training=(is_training and not freeze_batchnorm),
conv_hyperparams=(conv_hyperparams
if self._override_base_feature_extractor_hyperparams
else None),
weights=None,
use_explicit_padding=use_explicit_padding,
alpha=self._depth_multiplier,
min_depth=self._min_depth,
include_top=False)
conv2d_11_pointwise = full_mobilenet_v2.get_layer(
name='block_13_expand_relu').output
conv2d_13_pointwise = full_mobilenet_v2.get_layer(name='out_relu').output
self.mobilenet_v2 = tf.keras.Model(
inputs=full_mobilenet_v2.inputs,
outputs=[conv2d_11_pointwise, conv2d_13_pointwise])
self.feature_map_generator = (
feature_map_generators.KerasMultiResolutionFeatureMaps(
feature_map_layout=feature_map_layout,
depth_multiplier=self._depth_multiplier,
min_depth=self._min_depth,
insert_1x1_conv=True,
is_training=is_training,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=freeze_batchnorm,
name='FeatureMaps'))
self.mobilenet_v2 = None
self.feature_map_generator = None
def build(self, input_shape):
full_mobilenet_v2 = mobilenet_v2.mobilenet_v2(
batchnorm_training=(self._is_training and not self._freeze_batchnorm),
conv_hyperparams=(self._conv_hyperparams
if self._override_base_feature_extractor_hyperparams
else None),
weights=None,
use_explicit_padding=self._use_explicit_padding,
alpha=self._depth_multiplier,
min_depth=self._min_depth,
include_top=False)
conv2d_11_pointwise = full_mobilenet_v2.get_layer(
name='block_13_expand_relu').output
conv2d_13_pointwise = full_mobilenet_v2.get_layer(name='out_relu').output
self.mobilenet_v2 = tf.keras.Model(
inputs=full_mobilenet_v2.inputs,
outputs=[conv2d_11_pointwise, conv2d_13_pointwise])
self.feature_map_generator = (
feature_map_generators.KerasMultiResolutionFeatureMaps(
feature_map_layout=self._feature_map_layout,
depth_multiplier=self._depth_multiplier,
min_depth=self._min_depth,
insert_1x1_conv=True,
is_training=self._is_training,
conv_hyperparams=self._conv_hyperparams,
freeze_batchnorm=self._freeze_batchnorm,
name='FeatureMaps'))
self.built = True
def preprocess(self, resized_inputs):
"""SSD preprocessing.
......
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""SSDFeatureExtractor for PNASNet features.
Based on PNASNet ImageNet model: https://arxiv.org/abs/1712.00559
"""
import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators
from object_detection.utils import context_manager
from object_detection.utils import ops
from nets.nasnet import pnasnet
slim = tf.contrib.slim
def pnasnet_large_arg_scope_for_detection(is_batch_norm_training=False):
"""Defines the default arg scope for the PNASNet Large for object detection.
This provides a small edit to switch batch norm training on and off.
Args:
is_batch_norm_training: Boolean indicating whether to train with batch norm.
Default is False.
Returns:
An `arg_scope` to use for the PNASNet Large Model.
"""
imagenet_scope = pnasnet.pnasnet_large_arg_scope()
with slim.arg_scope(imagenet_scope):
with slim.arg_scope([slim.batch_norm],
is_training=is_batch_norm_training) as sc:
return sc
class SSDPNASNetFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""SSD Feature Extractor using PNASNet features."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams_fn,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False,
override_base_feature_extractor_hyperparams=False):
"""PNASNet Feature Extractor for SSD Models.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops in the layers that are added on top of the
base feature extractor.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
use_depthwise: Whether to use depthwise convolutions.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_fn`.
"""
super(SSDPNASNetFeatureExtractor, self).__init__(
is_training=is_training,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams_fn=conv_hyperparams_fn,
reuse_weights=reuse_weights,
use_explicit_padding=use_explicit_padding,
use_depthwise=use_depthwise,
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams)
def preprocess(self, resized_inputs):
"""SSD preprocessing.
Maps pixel values to the range [-1, 1].
Args:
resized_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
return (2.0 / 255.0) * resized_inputs - 1.0
def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
feature_map_layout = {
'from_layer': ['Cell_7', 'Cell_11', '', '', '', ''],
'layer_depth': [-1, -1, 512, 256, 256, 128],
'use_explicit_padding': self._use_explicit_padding,
'use_depthwise': self._use_depthwise,
}
with slim.arg_scope(
pnasnet_large_arg_scope_for_detection(
is_batch_norm_training=self._is_training)):
with slim.arg_scope([slim.conv2d, slim.batch_norm, slim.separable_conv2d],
reuse=self._reuse_weights):
with (slim.arg_scope(self._conv_hyperparams_fn())
if self._override_base_feature_extractor_hyperparams else
context_manager.IdentityContextManager()):
_, image_features = pnasnet.build_pnasnet_large(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
num_classes=None,
is_training=self._is_training,
final_endpoint='Cell_11')
with tf.variable_scope('SSD_feature_maps', reuse=self._reuse_weights):
with slim.arg_scope(self._conv_hyperparams_fn()):
feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_map_layout=feature_map_layout,
depth_multiplier=self._depth_multiplier,
min_depth=self._min_depth,
insert_1x1_conv=True,
image_features=image_features)
return feature_maps.values()
def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
"""Returns a map of variables to load from a foreign checkpoint.
Note that this overrides the default implementation in
ssd_meta_arch.SSDFeatureExtractor which does not work for PNASNet
checkpoints.
Args:
feature_extractor_scope: A scope name for the first stage feature
extractor.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
the model graph.
"""
variables_to_restore = {}
for variable in tf.global_variables():
if variable.op.name.startswith(feature_extractor_scope):
var_name = variable.op.name.replace(feature_extractor_scope + '/', '')
var_name += '/ExponentialMovingAverage'
variables_to_restore[var_name] = variable
return variables_to_restore
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for ssd_pnas_feature_extractor."""
import numpy as np
import tensorflow as tf
from object_detection.models import ssd_feature_extractor_test
from object_detection.models import ssd_pnasnet_feature_extractor
slim = tf.contrib.slim
class SsdPnasNetFeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, use_explicit_padding=False):
"""Constructs a new feature extractor.
Args:
depth_multiplier: float depth multiplier for feature extractor
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
is_training: whether the network is in training mode.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
Returns:
an ssd_meta_arch.SSDFeatureExtractor object.
"""
min_depth = 32
return ssd_pnasnet_feature_extractor.SSDPNASNetFeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple,
self.conv_hyperparams_fn,
use_explicit_padding=use_explicit_padding)
def test_extract_features_returns_correct_shapes_128(self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(2, 8, 8, 2160), (2, 4, 4, 4320),
(2, 2, 2, 512), (2, 1, 1, 256),
(2, 1, 1, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_299(self):
image_height = 299
image_width = 299
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(2, 19, 19, 2160), (2, 10, 10, 4320),
(2, 5, 5, 512), (2, 3, 3, 256),
(2, 2, 2, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_preprocess_returns_correct_value_range(self):
image_height = 128
image_width = 128
depth_multiplier = 1
pad_to_multiple = 1
test_image = np.random.rand(2, image_height, image_width, 3)
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
preprocessed_image = feature_extractor.preprocess(test_image)
self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
if __name__ == '__main__':
tf.test.main()
......@@ -113,6 +113,8 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
VGG style channel mean subtraction as described here:
https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-mdnge.
Note that if the number of channels is not equal to 3, the mean subtraction
will be skipped and the original resized_inputs will be returned.
Args:
resized_inputs: a [batch, height, width, channels] float tensor
......@@ -122,8 +124,11 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
channel_means = [123.68, 116.779, 103.939]
return resized_inputs - [[channel_means]]
if resized_inputs.shape.as_list()[3] == 3:
channel_means = [123.68, 116.779, 103.939]
return resized_inputs - [[channel_means]]
else:
return resized_inputs
def _filter_features(self, image_features):
# TODO(rathodv): Change resnet endpoint to strip scope prefixes instead
......
......@@ -82,12 +82,15 @@ class SSDResnetFPNFeatureExtractorTestBase(
image_width = 128
depth_multiplier = 1
pad_to_multiple = 1
test_image = np.random.rand(4, image_height, image_width, 3)
test_image = tf.constant(np.random.rand(4, image_height, image_width, 3))
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
preprocessed_image = feature_extractor.preprocess(test_image)
self.assertAllClose(preprocessed_image,
test_image - [[123.68, 116.779, 103.939]])
with self.test_session() as sess:
test_image_out, preprocessed_image_out = sess.run(
[test_image, preprocessed_image])
self.assertAllClose(preprocessed_image_out,
test_image_out - [[123.68, 116.779, 103.939]])
def test_variables_only_created_in_scope(self):
depth_multiplier = 1
......@@ -103,5 +106,3 @@ class SSDResnetFPNFeatureExtractorTestBase(
self.assertTrue(
variable.name.startswith(self._resnet_scope_name())
or variable.name.startswith(self._fpn_scope_name()))
......@@ -98,6 +98,8 @@ class _SSDResnetPpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
VGG style channel mean subtraction as described here:
https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-mdnge.
Note that if the number of channels is not equal to 3, the mean subtraction
will be skipped and the original resized_inputs will be returned.
Args:
resized_inputs: a [batch, height, width, channels] float tensor
......@@ -107,8 +109,11 @@ class _SSDResnetPpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
channel_means = [123.68, 116.779, 103.939]
return resized_inputs - [[channel_means]]
if resized_inputs.shape.as_list()[3] == 3:
channel_means = [123.68, 116.779, 103.939]
return resized_inputs - [[channel_means]]
else:
return resized_inputs
def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
......
......@@ -15,6 +15,7 @@
"""Tests for ssd resnet v1 feature extractors."""
import abc
import numpy as np
import tensorflow as tf
from object_detection.models import ssd_feature_extractor_test
......@@ -64,12 +65,15 @@ class SSDResnetPpnFeatureExtractorTestBase(
image_width = 128
depth_multiplier = 1
pad_to_multiple = 1
test_image = np.random.rand(4, image_height, image_width, 3)
test_image = tf.constant(np.random.rand(4, image_height, image_width, 3))
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
preprocessed_image = feature_extractor.preprocess(test_image)
self.assertAllClose(preprocessed_image,
test_image - [[123.68, 116.779, 103.939]])
with self.test_session() as sess:
test_image_out, preprocessed_image_out = sess.run(
[test_image, preprocessed_image])
self.assertAllClose(preprocessed_image_out,
test_image_out - [[123.68, 116.779, 103.939]])
def test_variables_only_created_in_scope(self):
depth_multiplier = 1
......
......@@ -134,26 +134,32 @@ class ConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
(len(self._prediction_heads[BOX_ENCODINGS]),
len(input_shapes)))
for stack_index, input_shape in enumerate(input_shapes):
net = tf.keras.Sequential(name='PreHeadConvolutions_%d' % stack_index)
self._shared_nets.append(net)
net = []
# Add additional conv layers before the class predictor.
features_depth = static_shape.get_depth(input_shape)
depth = max(min(features_depth, self._max_depth), self._min_depth)
tf.logging.info(
'depth of additional conv before box predictor: {}'.format(depth))
if depth > 0 and self._num_layers_before_predictor > 0:
for i in range(self._num_layers_before_predictor):
net.add(keras.Conv2D(depth, [1, 1],
name='Conv2d_%d_1x1_%d' % (i, depth),
padding='SAME',
**self._conv_hyperparams.params()))
net.add(self._conv_hyperparams.build_batch_norm(
net.append(keras.Conv2D(depth, [1, 1],
name='SharedConvolutions_%d/Conv2d_%d_1x1_%d'
% (stack_index, i, depth),
padding='SAME',
**self._conv_hyperparams.params()))
net.append(self._conv_hyperparams.build_batch_norm(
training=(self._is_training and not self._freeze_batchnorm),
name='Conv2d_%d_1x1_%d_norm' % (i, depth)))
net.add(self._conv_hyperparams.build_activation_layer(
name='Conv2d_%d_1x1_%d_activation' % (i, depth),
name='SharedConvolutions_%d/Conv2d_%d_1x1_%d_norm'
% (stack_index, i, depth)))
net.append(self._conv_hyperparams.build_activation_layer(
name='SharedConvolutions_%d/Conv2d_%d_1x1_%d_activation'
% (stack_index, i, depth),
))
# Until certain bugs are fixed in checkpointable lists,
# this net must be appended only once it's been filled with layers
self._shared_nets.append(net)
self.built = True
def _predict(self, image_features):
......@@ -175,10 +181,11 @@ class ConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
"""
predictions = collections.defaultdict(list)
for (index, image_feature) in enumerate(image_features):
for (index, net) in enumerate(image_features):
# Apply shared conv layers before the head predictors.
net = self._shared_nets[index](image_feature)
for layer in self._shared_nets[index]:
net = layer(net)
for head_name in self._prediction_heads:
head_obj = self._prediction_heads[head_name][index]
......
......@@ -181,8 +181,8 @@ class ConvolutionalKerasBoxPredictorTest(test_case.TestCase):
self.assertAllEqual(objectness_predictions_shape,
[4, expected_num_anchors, 1])
expected_variable_set = set([
'BoxPredictor/PreHeadConvolutions_0/Conv2d_0_1x1_32/bias',
'BoxPredictor/PreHeadConvolutions_0/Conv2d_0_1x1_32/kernel',
'BoxPredictor/SharedConvolutions_0/Conv2d_0_1x1_32/bias',
'BoxPredictor/SharedConvolutions_0/Conv2d_0_1x1_32/kernel',
'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/bias',
'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/kernel',
'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor/bias',
......
......@@ -34,16 +34,18 @@ class MaskRCNNClassHead(head.Head):
https://arxiv.org/abs/1703.06870
"""
def __init__(self, is_training, num_classes, fc_hyperparams_fn,
use_dropout, dropout_keep_prob):
def __init__(self,
is_training,
num_class_slots,
fc_hyperparams_fn,
use_dropout,
dropout_keep_prob):
"""Constructor.
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: number of classes. Note that num_classes *does not*
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
num_class_slots: number of class slots. Note that num_class_slots may or
may not include an implicit background category.
fc_hyperparams_fn: A function to generate tf-slim arg_scope with
hyperparameters for fully connected ops.
use_dropout: Option to use dropout or not. Note that a single dropout
......@@ -54,7 +56,7 @@ class MaskRCNNClassHead(head.Head):
"""
super(MaskRCNNClassHead, self).__init__()
self._is_training = is_training
self._num_classes = num_classes
self._num_class_slots = num_class_slots
self._fc_hyperparams_fn = fc_hyperparams_fn
self._use_dropout = use_dropout
self._dropout_keep_prob = dropout_keep_prob
......@@ -70,7 +72,7 @@ class MaskRCNNClassHead(head.Head):
Returns:
class_predictions_with_background: A float tensor of shape
[batch_size, 1, num_classes + 1] representing the class predictions for
[batch_size, 1, num_class_slots] representing the class predictions for
the proposals.
Raises:
......@@ -91,11 +93,12 @@ class MaskRCNNClassHead(head.Head):
with slim.arg_scope(self._fc_hyperparams_fn()):
class_predictions_with_background = slim.fully_connected(
flattened_roi_pooled_features,
self._num_classes + 1,
self._num_class_slots,
activation_fn=None,
scope='ClassPredictor')
class_predictions_with_background = tf.reshape(
class_predictions_with_background, [-1, 1, self._num_classes + 1])
class_predictions_with_background,
[-1, 1, self._num_class_slots])
return class_predictions_with_background
......@@ -104,7 +107,7 @@ class ConvolutionalClassHead(head.Head):
def __init__(self,
is_training,
num_classes,
num_class_slots,
use_dropout,
dropout_keep_prob,
kernel_size,
......@@ -115,7 +118,8 @@ class ConvolutionalClassHead(head.Head):
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: Number of classes.
num_class_slots: number of class slots. Note that num_class_slots may or
may not include an implicit background category.
use_dropout: Option to use dropout or not. Note that a single dropout
op is applied here prior to both box and class predictions, which stands
in contrast to the ConvolutionalBoxPredictor below.
......@@ -137,7 +141,7 @@ class ConvolutionalClassHead(head.Head):
"""
super(ConvolutionalClassHead, self).__init__()
self._is_training = is_training
self._num_classes = num_classes
self._num_class_slots = num_class_slots
self._use_dropout = use_dropout
self._dropout_keep_prob = dropout_keep_prob
self._kernel_size = kernel_size
......@@ -156,12 +160,10 @@ class ConvolutionalClassHead(head.Head):
Returns:
class_predictions_with_background: A float tensors of shape
[batch_size, num_anchors, num_classes + 1] representing the class
[batch_size, num_anchors, num_class_slots] representing the class
predictions for the proposals.
"""
net = features
# Add a slot for the background class.
num_class_slots = self._num_classes + 1
if self._use_dropout:
net = slim.dropout(net, keep_prob=self._dropout_keep_prob)
if self._use_depthwise:
......@@ -171,7 +173,7 @@ class ConvolutionalClassHead(head.Head):
rate=1, scope='ClassPredictor_depthwise')
class_predictions_with_background = slim.conv2d(
class_predictions_with_background,
num_predictions_per_location * num_class_slots, [1, 1],
num_predictions_per_location * self._num_class_slots, [1, 1],
activation_fn=None,
normalizer_fn=None,
normalizer_params=None,
......@@ -179,7 +181,7 @@ class ConvolutionalClassHead(head.Head):
else:
class_predictions_with_background = slim.conv2d(
net,
num_predictions_per_location * num_class_slots,
num_predictions_per_location * self._num_class_slots,
[self._kernel_size, self._kernel_size],
activation_fn=None,
normalizer_fn=None,
......@@ -194,7 +196,8 @@ class ConvolutionalClassHead(head.Head):
if batch_size is None:
batch_size = tf.shape(features)[0]
class_predictions_with_background = tf.reshape(
class_predictions_with_background, [batch_size, -1, num_class_slots])
class_predictions_with_background,
[batch_size, -1, self._num_class_slots])
return class_predictions_with_background
......@@ -208,7 +211,7 @@ class WeightSharedConvolutionalClassHead(head.Head):
"""
def __init__(self,
num_classes,
num_class_slots,
kernel_size=3,
class_prediction_bias_init=0.0,
use_dropout=False,
......@@ -218,10 +221,8 @@ class WeightSharedConvolutionalClassHead(head.Head):
"""Constructor.
Args:
num_classes: number of classes. Note that num_classes *does not*
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
num_class_slots: number of class slots. Note that num_class_slots may or
may not include an implicit background category.
kernel_size: Size of final convolution kernel.
class_prediction_bias_init: constant value to initialize bias of the last
conv2d layer before class prediction.
......@@ -233,7 +234,7 @@ class WeightSharedConvolutionalClassHead(head.Head):
as inputs and returns tensors).
"""
super(WeightSharedConvolutionalClassHead, self).__init__()
self._num_classes = num_classes
self._num_class_slots = num_class_slots
self._kernel_size = kernel_size
self._class_prediction_bias_init = class_prediction_bias_init
self._use_dropout = use_dropout
......@@ -252,12 +253,10 @@ class WeightSharedConvolutionalClassHead(head.Head):
Returns:
class_predictions_with_background: A tensor of shape
[batch_size, num_anchors, num_classes + 1] representing the class
[batch_size, num_anchors, num_class_slots] representing the class
predictions for the proposals.
"""
class_predictions_net = features
num_class_slots = self._num_classes + 1
# Add a slot for the background class.
if self._use_dropout:
class_predictions_net = slim.dropout(
class_predictions_net, keep_prob=self._dropout_keep_prob)
......@@ -267,7 +266,7 @@ class WeightSharedConvolutionalClassHead(head.Head):
conv_op = slim.conv2d
class_predictions_with_background = conv_op(
class_predictions_net,
num_predictions_per_location * num_class_slots,
num_predictions_per_location * self._num_class_slots,
[self._kernel_size, self._kernel_size],
activation_fn=None, stride=1, padding='SAME',
normalizer_fn=None,
......@@ -280,5 +279,6 @@ class WeightSharedConvolutionalClassHead(head.Head):
class_predictions_with_background = self._score_converter_fn(
class_predictions_with_background)
class_predictions_with_background = tf.reshape(
class_predictions_with_background, [batch_size, -1, num_class_slots])
class_predictions_with_background,
[batch_size, -1, self._num_class_slots])
return class_predictions_with_background
......@@ -46,7 +46,7 @@ class MaskRCNNClassHeadTest(test_case.TestCase):
def test_prediction_size(self):
class_prediction_head = class_head.MaskRCNNClassHead(
is_training=False,
num_classes=20,
num_class_slots=20,
fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
use_dropout=True,
dropout_keep_prob=0.5)
......@@ -54,7 +54,7 @@ class MaskRCNNClassHeadTest(test_case.TestCase):
[64, 7, 7, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
prediction = class_prediction_head.predict(
features=roi_pooled_features, num_predictions_per_location=1)
self.assertAllEqual([64, 1, 21], prediction.get_shape().as_list())
self.assertAllEqual([64, 1, 20], prediction.get_shape().as_list())
class ConvolutionalClassPredictorTest(test_case.TestCase):
......@@ -80,7 +80,7 @@ class ConvolutionalClassPredictorTest(test_case.TestCase):
def test_prediction_size(self):
class_prediction_head = class_head.ConvolutionalClassHead(
is_training=True,
num_classes=20,
num_class_slots=20,
use_dropout=True,
dropout_keep_prob=0.5,
kernel_size=3)
......@@ -89,7 +89,7 @@ class ConvolutionalClassPredictorTest(test_case.TestCase):
class_predictions = class_prediction_head.predict(
features=image_feature,
num_predictions_per_location=1)
self.assertAllEqual([64, 323, 21],
self.assertAllEqual([64, 323, 20],
class_predictions.get_shape().as_list())
......@@ -115,13 +115,13 @@ class WeightSharedConvolutionalClassPredictorTest(test_case.TestCase):
def test_prediction_size(self):
class_prediction_head = (
class_head.WeightSharedConvolutionalClassHead(num_classes=20))
class_head.WeightSharedConvolutionalClassHead(num_class_slots=20))
image_feature = tf.random_uniform(
[64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
class_predictions = class_prediction_head.predict(
features=image_feature,
num_predictions_per_location=1)
self.assertAllEqual([64, 323, 21], class_predictions.get_shape().as_list())
self.assertAllEqual([64, 323, 20], class_predictions.get_shape().as_list())
if __name__ == '__main__':
......
......@@ -91,7 +91,7 @@ class ConvolutionalBoxHead(head.KerasHead):
tf.keras.layers.Conv2D(
num_predictions_per_location * self._box_code_size, [1, 1],
name='BoxEncodingPredictor',
**conv_hyperparams.params(activation=None)))
**conv_hyperparams.params(use_bias=True)))
else:
self._box_encoder_layers.append(
tf.keras.layers.Conv2D(
......@@ -99,7 +99,7 @@ class ConvolutionalBoxHead(head.KerasHead):
[self._kernel_size, self._kernel_size],
padding='SAME',
name='BoxEncodingPredictor',
**conv_hyperparams.params(activation=None)))
**conv_hyperparams.params(use_bias=True)))
def _predict(self, features):
"""Predicts boxes.
......
......@@ -29,7 +29,7 @@ class ConvolutionalClassHead(head.KerasHead):
def __init__(self,
is_training,
num_classes,
num_class_slots,
use_dropout,
dropout_keep_prob,
kernel_size,
......@@ -43,7 +43,8 @@ class ConvolutionalClassHead(head.KerasHead):
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: Number of classes.
num_class_slots: number of class slots. Note that num_class_slots may or
may not include an implicit background category.
use_dropout: Option to use dropout or not. Note that a single dropout
op is applied here prior to both box and class predictions, which stands
in contrast to the ConvolutionalBoxPredictor below.
......@@ -73,13 +74,12 @@ class ConvolutionalClassHead(head.KerasHead):
"""
super(ConvolutionalClassHead, self).__init__(name=name)
self._is_training = is_training
self._num_classes = num_classes
self._use_dropout = use_dropout
self._dropout_keep_prob = dropout_keep_prob
self._kernel_size = kernel_size
self._class_prediction_bias_init = class_prediction_bias_init
self._use_depthwise = use_depthwise
self._num_class_slots = self._num_classes + 1
self._num_class_slots = num_class_slots
self._class_predictor_layers = []
......@@ -110,7 +110,7 @@ class ConvolutionalClassHead(head.KerasHead):
tf.keras.layers.Conv2D(
num_predictions_per_location * self._num_class_slots, [1, 1],
name='ClassPredictor',
**conv_hyperparams.params(activation=None)))
**conv_hyperparams.params(use_bias=True)))
else:
self._class_predictor_layers.append(
tf.keras.layers.Conv2D(
......@@ -120,7 +120,7 @@ class ConvolutionalClassHead(head.KerasHead):
name='ClassPredictor',
bias_initializer=tf.constant_initializer(
self._class_prediction_bias_init),
**conv_hyperparams.params(activation=None)))
**conv_hyperparams.params(use_bias=True)))
def _predict(self, features):
"""Predicts boxes.
......@@ -131,7 +131,7 @@ class ConvolutionalClassHead(head.KerasHead):
Returns:
class_predictions_with_background: A float tensor of shape
[batch_size, num_anchors, num_classes + 1] representing the class
[batch_size, num_anchors, num_class_slots] representing the class
predictions for the proposals.
"""
# Add a slot for the background class.
......
......@@ -45,7 +45,7 @@ class ConvolutionalKerasClassPredictorTest(test_case.TestCase):
conv_hyperparams = self._build_conv_hyperparams()
class_prediction_head = keras_class_head.ConvolutionalClassHead(
is_training=True,
num_classes=20,
num_class_slots=20,
use_dropout=True,
dropout_keep_prob=0.5,
kernel_size=3,
......@@ -56,7 +56,7 @@ class ConvolutionalKerasClassPredictorTest(test_case.TestCase):
image_feature = tf.random_uniform(
[64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
class_predictions = class_prediction_head(image_feature,)
self.assertAllEqual([64, 323, 21],
self.assertAllEqual([64, 323, 20],
class_predictions.get_shape().as_list())
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
......
......@@ -124,7 +124,7 @@ class ConvolutionalMaskHead(head.KerasHead):
tf.keras.layers.Conv2D(
num_predictions_per_location * num_mask_channels, [1, 1],
name='MaskPredictor',
**conv_hyperparams.params(activation=None)))
**conv_hyperparams.params(use_bias=True)))
else:
self._mask_predictor_layers.append(
tf.keras.layers.Conv2D(
......@@ -132,7 +132,7 @@ class ConvolutionalMaskHead(head.KerasHead):
[self._kernel_size, self._kernel_size],
padding='SAME',
name='MaskPredictor',
**conv_hyperparams.params(activation=None)))
**conv_hyperparams.params(use_bias=True)))
def _predict(self, features):
"""Predicts boxes.
......
......@@ -23,6 +23,7 @@ import math
import tensorflow as tf
from object_detection.predictors.heads import head
from object_detection.utils import ops
slim = tf.contrib.slim
......@@ -41,7 +42,8 @@ class MaskRCNNMaskHead(head.Head):
mask_width=14,
mask_prediction_num_conv_layers=2,
mask_prediction_conv_depth=256,
masks_are_class_agnostic=False):
masks_are_class_agnostic=False,
convolve_then_upsample=False):
"""Constructor.
Args:
......@@ -62,6 +64,10 @@ class MaskRCNNMaskHead(head.Head):
image features.
masks_are_class_agnostic: Boolean determining if the mask-head is
class-agnostic or not.
convolve_then_upsample: Whether to apply convolutions on mask features
before upsampling using nearest neighbor resizing. Otherwise, mask
features are resized to [`mask_height`, `mask_width`] using bilinear
resizing before applying convolutions.
Raises:
ValueError: conv_hyperparams_fn is None.
......@@ -74,6 +80,7 @@ class MaskRCNNMaskHead(head.Head):
self._mask_prediction_num_conv_layers = mask_prediction_num_conv_layers
self._mask_prediction_conv_depth = mask_prediction_conv_depth
self._masks_are_class_agnostic = masks_are_class_agnostic
self._convolve_then_upsample = convolve_then_upsample
if conv_hyperparams_fn is None:
raise ValueError('conv_hyperparams_fn is None.')
......@@ -135,17 +142,30 @@ class MaskRCNNMaskHead(head.Head):
num_conv_channels = self._get_mask_predictor_conv_depth(
num_feature_channels, self._num_classes)
with slim.arg_scope(self._conv_hyperparams_fn()):
upsampled_features = tf.image.resize_bilinear(
features, [self._mask_height, self._mask_width],
align_corners=True)
if not self._convolve_then_upsample:
features = tf.image.resize_bilinear(
features, [self._mask_height, self._mask_width],
align_corners=True)
for _ in range(self._mask_prediction_num_conv_layers - 1):
upsampled_features = slim.conv2d(
upsampled_features,
features = slim.conv2d(
features,
num_outputs=num_conv_channels,
kernel_size=[3, 3])
if self._convolve_then_upsample:
# Replace Transposed Convolution with a Nearest Neighbor upsampling step
# followed by 3x3 convolution.
height_scale = self._mask_height / features.shape[1].value
width_scale = self._mask_width / features.shape[2].value
features = ops.nearest_neighbor_upsampling(
features, height_scale=height_scale, width_scale=width_scale)
features = slim.conv2d(
features,
num_outputs=num_conv_channels,
kernel_size=[3, 3])
num_masks = 1 if self._masks_are_class_agnostic else self._num_classes
mask_predictions = slim.conv2d(
upsampled_features,
features,
num_outputs=num_masks,
activation_fn=None,
normalizer_fn=None,
......
......@@ -58,6 +58,22 @@ class MaskRCNNMaskHeadTest(test_case.TestCase):
features=roi_pooled_features, num_predictions_per_location=1)
self.assertAllEqual([64, 1, 20, 14, 14], prediction.get_shape().as_list())
def test_prediction_size_with_convolve_then_upsample(self):
mask_prediction_head = mask_head.MaskRCNNMaskHead(
num_classes=20,
conv_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
mask_height=28,
mask_width=28,
mask_prediction_num_conv_layers=2,
mask_prediction_conv_depth=256,
masks_are_class_agnostic=True,
convolve_then_upsample=True)
roi_pooled_features = tf.random_uniform(
[64, 14, 14, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
prediction = mask_prediction_head.predict(
features=roi_pooled_features, num_predictions_per_location=1)
self.assertAllEqual([64, 1, 1, 28, 28], prediction.get_shape().as_list())
class ConvolutionalMaskPredictorTest(test_case.TestCase):
......
......@@ -138,6 +138,7 @@ message WeightSharedConvolutionalBoxPredictor {
// TODO(alirezafathi): Refactor the proto file to be able to configure mask rcnn
// head easily.
// Next id: 15
message MaskRCNNBoxPredictor {
// Hyperparameters for fully connected ops used in the box predictor.
optional Hyperparams fc_hyperparams = 1;
......@@ -178,6 +179,12 @@ message MaskRCNNBoxPredictor {
// Whether to use one box for all classes rather than a different box for each
// class.
optional bool share_box_across_classes = 13 [default = false];
// Whether to apply convolutions on mask features before upsampling using
// nearest neighbor resizing.
// By default, mask features are resized to [`mask_height`, `mask_width`]
// before applying convolutions and predicting masks.
optional bool convolve_then_upsample_masks = 14 [default = false];
}
message RfcnBoxPredictor {
......
......@@ -164,6 +164,10 @@ message FasterRcnn {
// Whether the masks present in groundtruth should be resized in the model to
// match the image size.
optional bool resize_masks = 36 [default = true];
// If True, uses implementation of ops with static shape guarantees when
// running evaluation (specifically not is_training if False).
optional bool use_static_shapes_for_eval = 37 [default = false];
}
......
......@@ -155,6 +155,9 @@ message RandomCropImage {
// value, it is removed from the new image.
optional float overlap_thresh = 6 [default=0.3];
// Whether to clip the boxes to the cropped image.
optional bool clip_boxes = 8 [default=true];
// Probability of keeping the original image.
optional float random_coef = 7 [default=0.0];
}
......@@ -194,6 +197,9 @@ message RandomCropPadImage {
// value, it is removed from the new image.
optional float overlap_thresh = 6 [default=0.3];
// Whether to clip the boxes to the cropped image.
optional bool clip_boxes = 11 [default=true];
// Probability of keeping the original image during the crop operation.
optional float random_coef = 7 [default=0.0];
......@@ -217,6 +223,9 @@ message RandomCropToAspectRatio {
// ratio between a cropped bounding box and the original is less than this
// value, it is removed from the new image.
optional float overlap_thresh = 2 [default=0.3];
// Whether to clip the boxes to the cropped image.
optional bool clip_boxes = 3 [default=true];
}
// Randomly adds black square patches to an image.
......@@ -285,6 +294,9 @@ message SSDRandomCropOperation {
// Cropped box area ratio must be above this threhold to be kept.
optional float overlap_thresh = 6;
// Whether to clip the boxes to the cropped image.
optional bool clip_boxes = 8 [default=true];
// Probability a crop operation is skipped.
optional float random_coef = 7;
}
......@@ -315,6 +327,9 @@ message SSDRandomCropPadOperation {
// Cropped box area ratio must be above this threhold to be kept.
optional float overlap_thresh = 6;
// Whether to clip the boxes to the cropped image.
optional bool clip_boxes = 13 [default=true];
// Probability a crop operation is skipped.
optional float random_coef = 7;
......@@ -353,6 +368,9 @@ message SSDRandomCropFixedAspectRatioOperation {
// Cropped box area ratio must be above this threhold to be kept.
optional float overlap_thresh = 6;
// Whether to clip the boxes to the cropped image.
optional bool clip_boxes = 8 [default=true];
// Probability a crop operation is skipped.
optional float random_coef = 7;
}
......@@ -387,6 +405,9 @@ message SSDRandomCropPadFixedAspectRatioOperation {
// Cropped box area ratio must be above this threhold to be kept.
optional float overlap_thresh = 6;
// Whether to clip the boxes to the cropped image.
optional bool clip_boxes = 8 [default=true];
// Probability a crop operation is skipped.
optional float random_coef = 7;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment