Commit 98db9b25 authored by Yeqing Li's avatar Yeqing Li Committed by A. Unique TensorFlower
Browse files

Adds support for separable convolution.

PiperOrigin-RevId: 279113063
parent b2bf29cf
...@@ -20,6 +20,7 @@ from __future__ import print_function ...@@ -20,6 +20,7 @@ from __future__ import print_function
from official.vision.detection.modeling.architecture import fpn from official.vision.detection.modeling.architecture import fpn
from official.vision.detection.modeling.architecture import heads from official.vision.detection.modeling.architecture import heads
from official.vision.detection.modeling.architecture import identity
from official.vision.detection.modeling.architecture import nn_ops from official.vision.detection.modeling.architecture import nn_ops
from official.vision.detection.modeling.architecture import resnet from official.vision.detection.modeling.architecture import resnet
...@@ -65,7 +66,10 @@ def multilevel_features_generator(params): ...@@ -65,7 +66,10 @@ def multilevel_features_generator(params):
min_level=fpn_params.min_level, min_level=fpn_params.min_level,
max_level=fpn_params.max_level, max_level=fpn_params.max_level,
fpn_feat_dims=fpn_params.fpn_feat_dims, fpn_feat_dims=fpn_params.fpn_feat_dims,
use_separable_conv=fpn_params.use_separable_conv,
batch_norm_relu=batch_norm_relu_generator(fpn_params.batch_norm)) batch_norm_relu=batch_norm_relu_generator(fpn_params.batch_norm))
elif params.architecture.multilevel_features == 'identity':
fpn_fn = identity.Identity()
else: else:
raise ValueError('The multi-level feature model %s is not supported.' raise ValueError('The multi-level feature model %s is not supported.'
% params.architecture.multilevel_features) % params.architecture.multilevel_features)
...@@ -81,6 +85,7 @@ def retinanet_head_generator(params): ...@@ -81,6 +85,7 @@ def retinanet_head_generator(params):
params.anchors_per_location, params.anchors_per_location,
params.retinanet_head_num_convs, params.retinanet_head_num_convs,
params.retinanet_head_num_filters, params.retinanet_head_num_filters,
params.use_separable_conv,
batch_norm_relu=batch_norm_relu_generator(params.batch_norm)) batch_norm_relu=batch_norm_relu_generator(params.batch_norm))
...@@ -110,36 +115,15 @@ def mask_rcnn_head_generator(params): ...@@ -110,36 +115,15 @@ def mask_rcnn_head_generator(params):
def shapeprior_head_generator(params): def shapeprior_head_generator(params):
"""Generator function for RetinaNet head architecture.""" """Generator function for Shapemask head architecture."""
return heads.ShapemaskPriorHead( raise NotImplementedError('Unimplemented')
params.num_classes,
params.num_downsample_channels,
params.mask_crop_size,
params.use_category_for_mask,
params.num_of_instances,
params.min_mask_level,
params.max_mask_level,
params.num_clusters,
params.temperature,
params.shape_prior_path)
def coarsemask_head_generator(params): def coarsemask_head_generator(params):
"""Generator function for RetinaNet head architecture.""" """Generator function for Shapemask head architecture."""
return heads.ShapemaskCoarsemaskHead( raise NotImplementedError('Unimplemented')
params.num_classes,
params.num_downsample_channels,
params.mask_crop_size,
params.use_category_for_mask,
params.num_convs)
def finemask_head_generator(params): def finemask_head_generator(params):
"""Generator function for RetinaNet head architecture.""" """Generator function for Shapemask head architecture."""
return heads.ShapemaskFinemaskHead( raise NotImplementedError('Unimplemented')
params.num_classes,
params.num_downsample_channels,
params.mask_crop_size,
params.num_convs,
params.coarse_mask_thr,
params.gt_upsample_scale)
...@@ -38,6 +38,7 @@ class Fpn(object): ...@@ -38,6 +38,7 @@ class Fpn(object):
min_level=3, min_level=3,
max_level=7, max_level=7,
fpn_feat_dims=256, fpn_feat_dims=256,
use_separable_conv=False,
batch_norm_relu=nn_ops.BatchNormRelu): batch_norm_relu=nn_ops.BatchNormRelu):
"""FPN initialization function. """FPN initialization function.
...@@ -45,17 +46,62 @@ class Fpn(object): ...@@ -45,17 +46,62 @@ class Fpn(object):
min_level: `int` minimum level in FPN output feature maps. min_level: `int` minimum level in FPN output feature maps.
max_level: `int` maximum level in FPN output feature maps. max_level: `int` maximum level in FPN output feature maps.
fpn_feat_dims: `int` number of filters in FPN layers. fpn_feat_dims: `int` number of filters in FPN layers.
use_separable_conv: `bool`, if True use separable convolution for
convolution in FPN layers.
batch_norm_relu: an operation that includes a batch normalization layer batch_norm_relu: an operation that includes a batch normalization layer
followed by a relu layer(optional). followed by a relu layer(optional).
""" """
self._min_level = min_level self._min_level = min_level
self._max_level = max_level self._max_level = max_level
self._fpn_feat_dims = fpn_feat_dims self._fpn_feat_dims = fpn_feat_dims
self._batch_norm_relu = batch_norm_relu
self._batch_norm_relus = {} self._batch_norm_relus = {}
self._lateral_conv2d_op = {}
self._post_hoc_conv2d_op = {}
self._coarse_conv2d_op = {}
for level in range(self._min_level, self._max_level + 1): for level in range(self._min_level, self._max_level + 1):
self._batch_norm_relus[level] = batch_norm_relu( self._batch_norm_relus[level] = batch_norm_relu(
relu=False, name='p%d-bn' % level) relu=False, name='p%d-bn' % level)
if use_separable_conv:
self._lateral_conv2d_op[level] = tf.keras.layers.SeparableConv2D(
filters=self._fpn_feat_dims,
kernel_size=(1, 1),
padding='same',
depth_multiplier=1,
name='l%d' % level)
self._post_hoc_conv2d_op[level] = tf.keras.layers.SeparableConv2D(
filters=self._fpn_feat_dims,
strides=(1, 1),
kernel_size=(3, 3),
padding='same',
depth_multiplier=1,
name='post_hoc_d%d' % level)
self._coarse_conv2d_op[level] = tf.keras.layers.SeparableConv2D(
filters=self._fpn_feat_dims,
strides=(2, 2),
kernel_size=(3, 3),
padding='same',
depth_multiplier=1,
name='p%d' % level)
else:
self._lateral_conv2d_op[level] = tf.keras.layers.Conv2D(
filters=self._fpn_feat_dims,
kernel_size=(1, 1),
padding='same',
name='l%d' % level)
self._post_hoc_conv2d_op[level] = tf.keras.layers.Conv2D(
filters=self._fpn_feat_dims,
strides=(1, 1),
kernel_size=(3, 3),
padding='same',
name='post_hoc_d%d' % level)
self._coarse_conv2d_op[level] = tf.keras.layers.Conv2D(
filters=self._fpn_feat_dims,
strides=(2, 2),
kernel_size=(3, 3),
padding='same',
name='p%d' % level)
def __call__(self, multilevel_features, is_training=None): def __call__(self, multilevel_features, is_training=None):
"""Returns the FPN features for a given multilevel features. """Returns the FPN features for a given multilevel features.
...@@ -81,11 +127,7 @@ class Fpn(object): ...@@ -81,11 +127,7 @@ class Fpn(object):
# Adds lateral connections. # Adds lateral connections.
feats_lateral = {} feats_lateral = {}
for level in range(self._min_level, backbone_max_level + 1): for level in range(self._min_level, backbone_max_level + 1):
feats_lateral[level] = tf.keras.layers.Conv2D( feats_lateral[level] = self._lateral_conv2d_op[level](
filters=self._fpn_feat_dims,
kernel_size=(1, 1),
padding='same',
name='l%d' % level)(
multilevel_features[level]) multilevel_features[level])
# Adds top-down path. # Adds top-down path.
...@@ -96,26 +138,14 @@ class Fpn(object): ...@@ -96,26 +138,14 @@ class Fpn(object):
# Adds post-hoc 3x3 convolution kernel. # Adds post-hoc 3x3 convolution kernel.
for level in range(self._min_level, backbone_max_level + 1): for level in range(self._min_level, backbone_max_level + 1):
feats[level] = tf.keras.layers.Conv2D( feats[level] = self._post_hoc_conv2d_op[level](feats[level])
filters=self._fpn_feat_dims,
strides=(1, 1),
kernel_size=(3, 3),
padding='same',
name='post_hoc_d%d' % level)(
feats[level])
# Adds coarser FPN levels introduced for RetinaNet. # Adds coarser FPN levels introduced for RetinaNet.
for level in range(backbone_max_level + 1, self._max_level + 1): for level in range(backbone_max_level + 1, self._max_level + 1):
feats_in = feats[level - 1] feats_in = feats[level - 1]
if level > backbone_max_level + 1: if level > backbone_max_level + 1:
feats_in = tf.nn.relu(feats_in) feats_in = tf.nn.relu(feats_in)
feats[level] = tf.keras.layers.Conv2D( feats[level] = self._coarse_conv2d_op[level](feats_in)
filters=self._fpn_feat_dims,
strides=(2, 2),
kernel_size=(3, 3),
padding='same',
name='p%d' % level)(
feats_in)
# Adds batch_norm layer. # Adds batch_norm layer.
for level in range(self._min_level, self._max_level + 1): for level in range(self._min_level, self._max_level + 1):
feats[level] = self._batch_norm_relus[level]( feats[level] = self._batch_norm_relus[level](
......
...@@ -302,6 +302,7 @@ class RetinanetHead(object): ...@@ -302,6 +302,7 @@ class RetinanetHead(object):
anchors_per_location, anchors_per_location,
num_convs=4, num_convs=4,
num_filters=256, num_filters=256,
use_separable_conv=False,
batch_norm_relu=nn_ops.BatchNormRelu): batch_norm_relu=nn_ops.BatchNormRelu):
"""Initialize params to build RetinaNet head. """Initialize params to build RetinaNet head.
...@@ -313,6 +314,8 @@ class RetinanetHead(object): ...@@ -313,6 +314,8 @@ class RetinanetHead(object):
num_convs: `int` number of stacked convolution before the last prediction num_convs: `int` number of stacked convolution before the last prediction
layer. layer.
num_filters: `int` number of filters used in the head architecture. num_filters: `int` number of filters used in the head architecture.
use_separable_conv: `bool` to indicate whether to use separable
convoluation.
batch_norm_relu: an operation that includes a batch normalization layer batch_norm_relu: an operation that includes a batch normalization layer
followed by a relu layer(optional). followed by a relu layer(optional).
""" """
...@@ -324,6 +327,7 @@ class RetinanetHead(object): ...@@ -324,6 +327,7 @@ class RetinanetHead(object):
self._num_convs = num_convs self._num_convs = num_convs
self._num_filters = num_filters self._num_filters = num_filters
self._use_separable_conv = use_separable_conv
with tf.name_scope('class_net') as scope_name: with tf.name_scope('class_net') as scope_name:
self._class_name_scope = tf.name_scope(scope_name) self._class_name_scope = tf.name_scope(scope_name)
...@@ -340,6 +344,14 @@ class RetinanetHead(object): ...@@ -340,6 +344,14 @@ class RetinanetHead(object):
def _build_class_net_layers(self, batch_norm_relu): def _build_class_net_layers(self, batch_norm_relu):
"""Build re-usable layers for class prediction network.""" """Build re-usable layers for class prediction network."""
if self._use_separable_conv:
self._class_predict = tf.keras.layers.SeparableConv2D(
self._num_classes * self._anchors_per_location,
kernel_size=(3, 3),
bias_initializer=tf.constant_initializer(-np.log((1 - 0.01) / 0.01)),
padding='same',
name='class-predict')
else:
self._class_predict = tf.keras.layers.Conv2D( self._class_predict = tf.keras.layers.Conv2D(
self._num_classes * self._anchors_per_location, self._num_classes * self._anchors_per_location,
kernel_size=(3, 3), kernel_size=(3, 3),
...@@ -350,6 +362,16 @@ class RetinanetHead(object): ...@@ -350,6 +362,16 @@ class RetinanetHead(object):
self._class_conv = [] self._class_conv = []
self._class_batch_norm_relu = {} self._class_batch_norm_relu = {}
for i in range(self._num_convs): for i in range(self._num_convs):
if self._use_separable_conv:
self._class_conv.append(
tf.keras.layers.SeparableConv2D(
self._num_filters,
kernel_size=(3, 3),
bias_initializer=tf.zeros_initializer(),
activation=None,
padding='same',
name='class-' + str(i)))
else:
self._class_conv.append( self._class_conv.append(
tf.keras.layers.Conv2D( tf.keras.layers.Conv2D(
self._num_filters, self._num_filters,
...@@ -366,6 +388,14 @@ class RetinanetHead(object): ...@@ -366,6 +388,14 @@ class RetinanetHead(object):
def _build_box_net_layers(self, batch_norm_relu): def _build_box_net_layers(self, batch_norm_relu):
"""Build re-usable layers for box prediction network.""" """Build re-usable layers for box prediction network."""
if self._use_separable_conv:
self._box_predict = tf.keras.layers.SeparableConv2D(
4 * self._anchors_per_location,
kernel_size=(3, 3),
bias_initializer=tf.zeros_initializer(),
padding='same',
name='box-predict')
else:
self._box_predict = tf.keras.layers.Conv2D( self._box_predict = tf.keras.layers.Conv2D(
4 * self._anchors_per_location, 4 * self._anchors_per_location,
kernel_size=(3, 3), kernel_size=(3, 3),
...@@ -376,6 +406,16 @@ class RetinanetHead(object): ...@@ -376,6 +406,16 @@ class RetinanetHead(object):
self._box_conv = [] self._box_conv = []
self._box_batch_norm_relu = {} self._box_batch_norm_relu = {}
for i in range(self._num_convs): for i in range(self._num_convs):
if self._use_separable_conv:
self._box_conv.append(
tf.keras.layers.SeparableConv2D(
self._num_filters,
kernel_size=(3, 3),
activation=None,
bias_initializer=tf.zeros_initializer(),
padding='same',
name='box-' + str(i)))
else:
self._box_conv.append( self._box_conv.append(
tf.keras.layers.Conv2D( tf.keras.layers.Conv2D(
self._num_filters, self._num_filters,
......
...@@ -138,7 +138,7 @@ class Model(object): ...@@ -138,7 +138,7 @@ class Model(object):
return l2_weight_decay * tf.add_n([ return l2_weight_decay * tf.add_n([
tf.nn.l2_loss(v) tf.nn.l2_loss(v)
for v in self._keras_model.trainable_variables for v in self._keras_model.trainable_variables
if 'batch_normalization' not in v.name if 'batch_normalization' not in v.name and 'bias' not in v.name
]) ])
def make_restore_checkpoint_fn(self): def make_restore_checkpoint_fn(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment