Commit 3ce2f61b authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

Merge branch 'master' of https://github.com/tensorflow/models into context_tf2

parents bb16d5ca 8e9296ff
......@@ -21,8 +21,8 @@ from google.protobuf import text_format
from object_detection.builders import hyperparams_builder
from object_detection.models import faster_rcnn_resnet_v1_fpn_keras_feature_extractor as frcnn_res_fpn
from object_detection.utils import tf_version
from object_detection.protos import hyperparams_pb2
from object_detection.utils import tf_version
@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
......@@ -40,7 +40,7 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractorTest(tf.test.TestCase):
}
}
"""
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
text_format.Parse(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
def _build_feature_extractor(self):
......
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""SSD Keras-based EfficientNet + BiFPN (EfficientDet) Feature Extractor."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from absl import logging
from six.moves import range
from six.moves import zip
import tensorflow.compat.v2 as tf
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import bidirectional_feature_pyramid_generators as bifpn_generators
from object_detection.utils import ops
from object_detection.utils import shape_utils
from object_detection.utils import tf_version
# pylint: disable=g-import-not-at-top
if tf_version.is_tf2():
from official.vision.image_classification.efficientnet import efficientnet_model
_EFFICIENTNET_LEVEL_ENDPOINTS = {
1: 'stack_0/block_0/project_bn',
2: 'stack_1/block_1/add',
3: 'stack_2/block_1/add',
4: 'stack_4/block_2/add',
5: 'stack_6/block_0/project_bn',
}
class SSDEfficientNetBiFPNKerasFeatureExtractor(
ssd_meta_arch.SSDKerasFeatureExtractor):
"""SSD Keras-based EfficientNetBiFPN (EfficientDet) Feature Extractor."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
freeze_batchnorm,
inplace_batchnorm_update,
bifpn_min_level,
bifpn_max_level,
bifpn_num_iterations,
bifpn_num_filters,
bifpn_combine_method,
efficientnet_version,
use_explicit_padding=None,
use_depthwise=None,
override_base_feature_extractor_hyperparams=None,
name=None):
"""SSD Keras-based EfficientNetBiFPN (EfficientDet) feature extractor.
Args:
is_training: whether the network is in training mode.
depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
multiplier for the feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
containing convolution hyperparameters for the layers added on top of
the base feature extractor.
freeze_batchnorm: whether to freeze batch norm parameters during training
or not. When training with a small batch size (e.g. 1), it is desirable
to freeze batch norm update and use pretrained batch norm params.
inplace_batchnorm_update: whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
bifpn_min_level: the highest resolution feature map to use in BiFPN. The
valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
respectively.
bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
BiFPN constructions uses features maps starting from bifpn_min_level
upto the bifpn_max_level. In the case that there are not enough feature
maps in the backbone network, additional feature maps are created by
applying stride 2 convolutions until we get the desired number of BiFPN
levels.
bifpn_num_iterations: number of BiFPN iterations. Overrided if
efficientdet_version is provided.
bifpn_num_filters: number of filters (channels) in all BiFPN layers.
Overrided if efficientdet_version is provided.
bifpn_combine_method: the method used to combine BiFPN nodes.
efficientnet_version: the EfficientNet version to use for this feature
extractor's backbone.
use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
explicit padding when extracting features.
use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
convolutions when inputs to a node have a differing number of channels,
and use separable convolutions after combine operations.
override_base_feature_extractor_hyperparams: unsupported. Whether to
override hyperparameters of the base feature extractor with the one from
`conv_hyperparams`.
name: a string name scope to assign to the model. If 'None', Keras will
auto-generate one from the class name.
"""
super(SSDEfficientNetBiFPNKerasFeatureExtractor, self).__init__(
is_training=is_training,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=freeze_batchnorm,
inplace_batchnorm_update=inplace_batchnorm_update,
use_explicit_padding=None,
use_depthwise=None,
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams,
name=name)
if depth_multiplier != 1.0:
raise ValueError('EfficientNetBiFPN does not support a non-default '
'depth_multiplier.')
if use_explicit_padding:
raise ValueError('EfficientNetBiFPN does not support explicit padding.')
if use_depthwise:
raise ValueError('EfficientNetBiFPN does not support use_depthwise.')
if override_base_feature_extractor_hyperparams:
raise ValueError('EfficientNetBiFPN does not support '
'override_base_feature_extractor_hyperparams.')
self._bifpn_min_level = bifpn_min_level
self._bifpn_max_level = bifpn_max_level
self._bifpn_num_iterations = bifpn_num_iterations
self._bifpn_num_filters = max(bifpn_num_filters, min_depth)
self._bifpn_node_params = {'combine_method': bifpn_combine_method}
self._efficientnet_version = efficientnet_version
logging.info('EfficientDet EfficientNet backbone version: %s',
self._efficientnet_version)
logging.info('EfficientDet BiFPN num filters: %d', self._bifpn_num_filters)
logging.info('EfficientDet BiFPN num iterations: %d',
self._bifpn_num_iterations)
self._backbone_max_level = min(
max(_EFFICIENTNET_LEVEL_ENDPOINTS.keys()), bifpn_max_level)
self._output_layer_names = [
_EFFICIENTNET_LEVEL_ENDPOINTS[i]
for i in range(bifpn_min_level, self._backbone_max_level + 1)]
self._output_layer_alias = [
'level_{}'.format(i)
for i in range(bifpn_min_level, self._backbone_max_level + 1)]
# Initialize the EfficientNet backbone.
# Note, this is currently done in the init method rather than in the build
# method, since doing so introduces an error which is not well understood.
efficientnet_base = efficientnet_model.EfficientNet.from_name(
model_name=self._efficientnet_version,
overrides={'rescale_input': False})
outputs = [efficientnet_base.get_layer(output_layer_name).output
for output_layer_name in self._output_layer_names]
self._efficientnet = tf.keras.Model(
inputs=efficientnet_base.inputs, outputs=outputs)
self.classification_backbone = efficientnet_base
self._bifpn_stage = None
def build(self, input_shape):
self._bifpn_stage = bifpn_generators.KerasBiFpnFeatureMaps(
bifpn_num_iterations=self._bifpn_num_iterations,
bifpn_num_filters=self._bifpn_num_filters,
fpn_min_level=self._bifpn_min_level,
fpn_max_level=self._bifpn_max_level,
input_max_level=self._backbone_max_level,
is_training=self._is_training,
conv_hyperparams=self._conv_hyperparams,
freeze_batchnorm=self._freeze_batchnorm,
bifpn_node_params=self._bifpn_node_params,
name='bifpn')
self.built = True
def preprocess(self, inputs):
"""SSD preprocessing.
Channel-wise mean subtraction and scaling.
Args:
inputs: a [batch, height, width, channels] float tensor representing a
batch of images.
Returns:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
if inputs.shape.as_list()[3] == 3:
# Input images are expected to be in the range [0, 255].
channel_offset = [0.485, 0.456, 0.406]
channel_scale = [0.229, 0.224, 0.225]
return ((inputs / 255.0) - [[channel_offset]]) / [[channel_scale]]
else:
return inputs
def _extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
preprocessed_inputs = shape_utils.check_min_image_dim(
129, preprocessed_inputs)
base_feature_maps = self._efficientnet(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple))
output_feature_map_dict = self._bifpn_stage(
list(zip(self._output_layer_alias, base_feature_maps)))
return list(output_feature_map_dict.values())
class SSDEfficientNetB0BiFPNKerasFeatureExtractor(
SSDEfficientNetBiFPNKerasFeatureExtractor):
"""SSD Keras EfficientNet-b0 BiFPN (EfficientDet-d0) Feature Extractor."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
freeze_batchnorm,
inplace_batchnorm_update,
bifpn_min_level=3,
bifpn_max_level=7,
bifpn_num_iterations=3,
bifpn_num_filters=64,
bifpn_combine_method='fast_attention',
use_explicit_padding=None,
use_depthwise=None,
override_base_feature_extractor_hyperparams=None,
name='EfficientDet-D0'):
"""SSD Keras EfficientNet-b0 BiFPN (EfficientDet-d0) Feature Extractor.
Args:
is_training: whether the network is in training mode.
depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
multiplier for the feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
containing convolution hyperparameters for the layers added on top of
the base feature extractor.
freeze_batchnorm: whether to freeze batch norm parameters during training
or not. When training with a small batch size (e.g. 1), it is desirable
to freeze batch norm update and use pretrained batch norm params.
inplace_batchnorm_update: whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
bifpn_min_level: the highest resolution feature map to use in BiFPN. The
valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
respectively.
bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
BiFPN constructions uses features maps starting from bifpn_min_level
upto the bifpn_max_level. In the case that there are not enough feature
maps in the backbone network, additional feature maps are created by
applying stride 2 convolutions until we get the desired number of BiFPN
levels.
bifpn_num_iterations: number of BiFPN iterations. Overrided if
efficientdet_version is provided.
bifpn_num_filters: number of filters (channels) in all BiFPN layers.
Overrided if efficientdet_version is provided.
bifpn_combine_method: the method used to combine BiFPN nodes.
use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
explicit padding when extracting features.
use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
convolutions when inputs to a node have a differing number of channels,
and use separable convolutions after combine operations.
override_base_feature_extractor_hyperparams: unsupported. Whether to
override hyperparameters of the base feature extractor with the one from
`conv_hyperparams`.
name: a string name scope to assign to the model. If 'None', Keras will
auto-generate one from the class name.
"""
super(SSDEfficientNetB0BiFPNKerasFeatureExtractor, self).__init__(
is_training=is_training,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=freeze_batchnorm,
inplace_batchnorm_update=inplace_batchnorm_update,
bifpn_min_level=bifpn_min_level,
bifpn_max_level=bifpn_max_level,
bifpn_num_iterations=bifpn_num_iterations,
bifpn_num_filters=bifpn_num_filters,
bifpn_combine_method=bifpn_combine_method,
efficientnet_version='efficientnet-b0',
use_explicit_padding=use_explicit_padding,
use_depthwise=use_depthwise,
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams,
name=name)
class SSDEfficientNetB1BiFPNKerasFeatureExtractor(
SSDEfficientNetBiFPNKerasFeatureExtractor):
"""SSD Keras EfficientNet-b1 BiFPN (EfficientDet-d1) Feature Extractor."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
freeze_batchnorm,
inplace_batchnorm_update,
bifpn_min_level=3,
bifpn_max_level=7,
bifpn_num_iterations=4,
bifpn_num_filters=88,
bifpn_combine_method='fast_attention',
use_explicit_padding=None,
use_depthwise=None,
override_base_feature_extractor_hyperparams=None,
name='EfficientDet-D1'):
"""SSD Keras EfficientNet-b1 BiFPN (EfficientDet-d1) Feature Extractor.
Args:
is_training: whether the network is in training mode.
depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
multiplier for the feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
containing convolution hyperparameters for the layers added on top of
the base feature extractor.
freeze_batchnorm: whether to freeze batch norm parameters during training
or not. When training with a small batch size (e.g. 1), it is desirable
to freeze batch norm update and use pretrained batch norm params.
inplace_batchnorm_update: whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
bifpn_min_level: the highest resolution feature map to use in BiFPN. The
valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
respectively.
bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
BiFPN constructions uses features maps starting from bifpn_min_level
upto the bifpn_max_level. In the case that there are not enough feature
maps in the backbone network, additional feature maps are created by
applying stride 2 convolutions until we get the desired number of BiFPN
levels.
bifpn_num_iterations: number of BiFPN iterations. Overrided if
efficientdet_version is provided.
bifpn_num_filters: number of filters (channels) in all BiFPN layers.
Overrided if efficientdet_version is provided.
bifpn_combine_method: the method used to combine BiFPN nodes.
use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
explicit padding when extracting features.
use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
convolutions when inputs to a node have a differing number of channels,
and use separable convolutions after combine operations.
override_base_feature_extractor_hyperparams: unsupported. Whether to
override hyperparameters of the base feature extractor with the one from
`conv_hyperparams`.
name: a string name scope to assign to the model. If 'None', Keras will
auto-generate one from the class name.
"""
super(SSDEfficientNetB1BiFPNKerasFeatureExtractor, self).__init__(
is_training=is_training,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=freeze_batchnorm,
inplace_batchnorm_update=inplace_batchnorm_update,
bifpn_min_level=bifpn_min_level,
bifpn_max_level=bifpn_max_level,
bifpn_num_iterations=bifpn_num_iterations,
bifpn_num_filters=bifpn_num_filters,
bifpn_combine_method=bifpn_combine_method,
efficientnet_version='efficientnet-b1',
use_explicit_padding=use_explicit_padding,
use_depthwise=use_depthwise,
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams,
name=name)
class SSDEfficientNetB2BiFPNKerasFeatureExtractor(
SSDEfficientNetBiFPNKerasFeatureExtractor):
"""SSD Keras EfficientNet-b2 BiFPN (EfficientDet-d2) Feature Extractor."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
freeze_batchnorm,
inplace_batchnorm_update,
bifpn_min_level=3,
bifpn_max_level=7,
bifpn_num_iterations=5,
bifpn_num_filters=112,
bifpn_combine_method='fast_attention',
use_explicit_padding=None,
use_depthwise=None,
override_base_feature_extractor_hyperparams=None,
name='EfficientDet-D2'):
"""SSD Keras EfficientNet-b2 BiFPN (EfficientDet-d2) Feature Extractor.
Args:
is_training: whether the network is in training mode.
depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
multiplier for the feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
containing convolution hyperparameters for the layers added on top of
the base feature extractor.
freeze_batchnorm: whether to freeze batch norm parameters during training
or not. When training with a small batch size (e.g. 1), it is desirable
to freeze batch norm update and use pretrained batch norm params.
inplace_batchnorm_update: whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
bifpn_min_level: the highest resolution feature map to use in BiFPN. The
valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
respectively.
bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
BiFPN constructions uses features maps starting from bifpn_min_level
upto the bifpn_max_level. In the case that there are not enough feature
maps in the backbone network, additional feature maps are created by
applying stride 2 convolutions until we get the desired number of BiFPN
levels.
bifpn_num_iterations: number of BiFPN iterations. Overrided if
efficientdet_version is provided.
bifpn_num_filters: number of filters (channels) in all BiFPN layers.
Overrided if efficientdet_version is provided.
bifpn_combine_method: the method used to combine BiFPN nodes.
use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
explicit padding when extracting features.
use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
convolutions when inputs to a node have a differing number of channels,
and use separable convolutions after combine operations.
override_base_feature_extractor_hyperparams: unsupported. Whether to
override hyperparameters of the base feature extractor with the one from
`conv_hyperparams`.
name: a string name scope to assign to the model. If 'None', Keras will
auto-generate one from the class name.
"""
super(SSDEfficientNetB2BiFPNKerasFeatureExtractor, self).__init__(
is_training=is_training,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=freeze_batchnorm,
inplace_batchnorm_update=inplace_batchnorm_update,
bifpn_min_level=bifpn_min_level,
bifpn_max_level=bifpn_max_level,
bifpn_num_iterations=bifpn_num_iterations,
bifpn_num_filters=bifpn_num_filters,
bifpn_combine_method=bifpn_combine_method,
efficientnet_version='efficientnet-b2',
use_explicit_padding=use_explicit_padding,
use_depthwise=use_depthwise,
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams,
name=name)
class SSDEfficientNetB3BiFPNKerasFeatureExtractor(
SSDEfficientNetBiFPNKerasFeatureExtractor):
"""SSD Keras EfficientNet-b3 BiFPN (EfficientDet-d3) Feature Extractor."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
freeze_batchnorm,
inplace_batchnorm_update,
bifpn_min_level=3,
bifpn_max_level=7,
bifpn_num_iterations=6,
bifpn_num_filters=160,
bifpn_combine_method='fast_attention',
use_explicit_padding=None,
use_depthwise=None,
override_base_feature_extractor_hyperparams=None,
name='EfficientDet-D3'):
"""SSD Keras EfficientNet-b3 BiFPN (EfficientDet-d3) Feature Extractor.
Args:
is_training: whether the network is in training mode.
depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
multiplier for the feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
containing convolution hyperparameters for the layers added on top of
the base feature extractor.
freeze_batchnorm: whether to freeze batch norm parameters during training
or not. When training with a small batch size (e.g. 1), it is desirable
to freeze batch norm update and use pretrained batch norm params.
inplace_batchnorm_update: whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
bifpn_min_level: the highest resolution feature map to use in BiFPN. The
valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
respectively.
bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
BiFPN constructions uses features maps starting from bifpn_min_level
upto the bifpn_max_level. In the case that there are not enough feature
maps in the backbone network, additional feature maps are created by
applying stride 2 convolutions until we get the desired number of BiFPN
levels.
bifpn_num_iterations: number of BiFPN iterations. Overrided if
efficientdet_version is provided.
bifpn_num_filters: number of filters (channels) in all BiFPN layers.
Overrided if efficientdet_version is provided.
bifpn_combine_method: the method used to combine BiFPN nodes.
use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
explicit padding when extracting features.
use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
convolutions when inputs to a node have a differing number of channels,
and use separable convolutions after combine operations.
override_base_feature_extractor_hyperparams: unsupported. Whether to
override hyperparameters of the base feature extractor with the one from
`conv_hyperparams`.
name: a string name scope to assign to the model. If 'None', Keras will
auto-generate one from the class name.
"""
super(SSDEfficientNetB3BiFPNKerasFeatureExtractor, self).__init__(
is_training=is_training,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=freeze_batchnorm,
inplace_batchnorm_update=inplace_batchnorm_update,
bifpn_min_level=bifpn_min_level,
bifpn_max_level=bifpn_max_level,
bifpn_num_iterations=bifpn_num_iterations,
bifpn_num_filters=bifpn_num_filters,
bifpn_combine_method=bifpn_combine_method,
efficientnet_version='efficientnet-b3',
use_explicit_padding=use_explicit_padding,
use_depthwise=use_depthwise,
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams,
name=name)
class SSDEfficientNetB4BiFPNKerasFeatureExtractor(
SSDEfficientNetBiFPNKerasFeatureExtractor):
"""SSD Keras EfficientNet-b4 BiFPN (EfficientDet-d4) Feature Extractor."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
freeze_batchnorm,
inplace_batchnorm_update,
bifpn_min_level=3,
bifpn_max_level=7,
bifpn_num_iterations=7,
bifpn_num_filters=224,
bifpn_combine_method='fast_attention',
use_explicit_padding=None,
use_depthwise=None,
override_base_feature_extractor_hyperparams=None,
name='EfficientDet-D4'):
"""SSD Keras EfficientNet-b4 BiFPN (EfficientDet-d4) Feature Extractor.
Args:
is_training: whether the network is in training mode.
depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
multiplier for the feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
containing convolution hyperparameters for the layers added on top of
the base feature extractor.
freeze_batchnorm: whether to freeze batch norm parameters during training
or not. When training with a small batch size (e.g. 1), it is desirable
to freeze batch norm update and use pretrained batch norm params.
inplace_batchnorm_update: whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
bifpn_min_level: the highest resolution feature map to use in BiFPN. The
valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
respectively.
bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
BiFPN constructions uses features maps starting from bifpn_min_level
upto the bifpn_max_level. In the case that there are not enough feature
maps in the backbone network, additional feature maps are created by
applying stride 2 convolutions until we get the desired number of BiFPN
levels.
bifpn_num_iterations: number of BiFPN iterations. Overrided if
efficientdet_version is provided.
bifpn_num_filters: number of filters (channels) in all BiFPN layers.
Overrided if efficientdet_version is provided.
bifpn_combine_method: the method used to combine BiFPN nodes.
use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
explicit padding when extracting features.
use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
convolutions when inputs to a node have a differing number of channels,
and use separable convolutions after combine operations.
override_base_feature_extractor_hyperparams: unsupported. Whether to
override hyperparameters of the base feature extractor with the one from
`conv_hyperparams`.
name: a string name scope to assign to the model. If 'None', Keras will
auto-generate one from the class name.
"""
super(SSDEfficientNetB4BiFPNKerasFeatureExtractor, self).__init__(
is_training=is_training,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=freeze_batchnorm,
inplace_batchnorm_update=inplace_batchnorm_update,
bifpn_min_level=bifpn_min_level,
bifpn_max_level=bifpn_max_level,
bifpn_num_iterations=bifpn_num_iterations,
bifpn_num_filters=bifpn_num_filters,
bifpn_combine_method=bifpn_combine_method,
efficientnet_version='efficientnet-b4',
use_explicit_padding=use_explicit_padding,
use_depthwise=use_depthwise,
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams,
name=name)
class SSDEfficientNetB5BiFPNKerasFeatureExtractor(
SSDEfficientNetBiFPNKerasFeatureExtractor):
"""SSD Keras EfficientNet-b5 BiFPN (EfficientDet-d5) Feature Extractor."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
freeze_batchnorm,
inplace_batchnorm_update,
bifpn_min_level=3,
bifpn_max_level=7,
bifpn_num_iterations=7,
bifpn_num_filters=288,
bifpn_combine_method='fast_attention',
use_explicit_padding=None,
use_depthwise=None,
override_base_feature_extractor_hyperparams=None,
name='EfficientDet-D5'):
"""SSD Keras EfficientNet-b5 BiFPN (EfficientDet-d5) Feature Extractor.
Args:
is_training: whether the network is in training mode.
depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
multiplier for the feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
containing convolution hyperparameters for the layers added on top of
the base feature extractor.
freeze_batchnorm: whether to freeze batch norm parameters during training
or not. When training with a small batch size (e.g. 1), it is desirable
to freeze batch norm update and use pretrained batch norm params.
inplace_batchnorm_update: whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
bifpn_min_level: the highest resolution feature map to use in BiFPN. The
valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
respectively.
bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
BiFPN constructions uses features maps starting from bifpn_min_level
upto the bifpn_max_level. In the case that there are not enough feature
maps in the backbone network, additional feature maps are created by
applying stride 2 convolutions until we get the desired number of BiFPN
levels.
bifpn_num_iterations: number of BiFPN iterations. Overrided if
efficientdet_version is provided.
bifpn_num_filters: number of filters (channels) in all BiFPN layers.
Overrided if efficientdet_version is provided.
bifpn_combine_method: the method used to combine BiFPN nodes.
use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
explicit padding when extracting features.
use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
convolutions when inputs to a node have a differing number of channels,
and use separable convolutions after combine operations.
override_base_feature_extractor_hyperparams: unsupported. Whether to
override hyperparameters of the base feature extractor with the one from
`conv_hyperparams`.
name: a string name scope to assign to the model. If 'None', Keras will
auto-generate one from the class name.
"""
super(SSDEfficientNetB5BiFPNKerasFeatureExtractor, self).__init__(
is_training=is_training,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=freeze_batchnorm,
inplace_batchnorm_update=inplace_batchnorm_update,
bifpn_min_level=bifpn_min_level,
bifpn_max_level=bifpn_max_level,
bifpn_num_iterations=bifpn_num_iterations,
bifpn_num_filters=bifpn_num_filters,
bifpn_combine_method=bifpn_combine_method,
efficientnet_version='efficientnet-b5',
use_explicit_padding=use_explicit_padding,
use_depthwise=use_depthwise,
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams,
name=name)
class SSDEfficientNetB6BiFPNKerasFeatureExtractor(
SSDEfficientNetBiFPNKerasFeatureExtractor):
"""SSD Keras EfficientNet-b6 BiFPN (EfficientDet-d[6,7]) Feature Extractor."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
freeze_batchnorm,
inplace_batchnorm_update,
bifpn_min_level=3,
bifpn_max_level=7,
bifpn_num_iterations=8,
bifpn_num_filters=384,
bifpn_combine_method='sum',
use_explicit_padding=None,
use_depthwise=None,
override_base_feature_extractor_hyperparams=None,
name='EfficientDet-D6-D7'):
"""SSD Keras EfficientNet-b6 BiFPN (EfficientDet-d[6,7]) Feature Extractor.
SSD Keras EfficientNet-b6 BiFPN Feature Extractor, a.k.a. EfficientDet-d6
and EfficientDet-d7. The EfficientDet-d[6,7] models use the same backbone
EfficientNet-b6 and the same BiFPN architecture, and therefore have the same
number of parameters. They only differ in their input resolutions.
Args:
is_training: whether the network is in training mode.
depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
multiplier for the feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
containing convolution hyperparameters for the layers added on top of
the base feature extractor.
freeze_batchnorm: whether to freeze batch norm parameters during training
or not. When training with a small batch size (e.g. 1), it is desirable
to freeze batch norm update and use pretrained batch norm params.
inplace_batchnorm_update: whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
bifpn_min_level: the highest resolution feature map to use in BiFPN. The
valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
respectively.
bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
BiFPN constructions uses features maps starting from bifpn_min_level
upto the bifpn_max_level. In the case that there are not enough feature
maps in the backbone network, additional feature maps are created by
applying stride 2 convolutions until we get the desired number of BiFPN
levels.
bifpn_num_iterations: number of BiFPN iterations. Overrided if
efficientdet_version is provided.
bifpn_num_filters: number of filters (channels) in all BiFPN layers.
Overrided if efficientdet_version is provided.
bifpn_combine_method: the method used to combine BiFPN nodes.
use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
explicit padding when extracting features.
use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
convolutions when inputs to a node have a differing number of channels,
and use separable convolutions after combine operations.
override_base_feature_extractor_hyperparams: unsupported. Whether to
override hyperparameters of the base feature extractor with the one from
`conv_hyperparams`.
name: a string name scope to assign to the model. If 'None', Keras will
auto-generate one from the class name.
"""
super(SSDEfficientNetB6BiFPNKerasFeatureExtractor, self).__init__(
is_training=is_training,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=freeze_batchnorm,
inplace_batchnorm_update=inplace_batchnorm_update,
bifpn_min_level=bifpn_min_level,
bifpn_max_level=bifpn_max_level,
bifpn_num_iterations=bifpn_num_iterations,
bifpn_num_filters=bifpn_num_filters,
bifpn_combine_method=bifpn_combine_method,
efficientnet_version='efficientnet-b6',
use_explicit_padding=use_explicit_padding,
use_depthwise=use_depthwise,
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams,
name=name)
class SSDEfficientNetB7BiFPNKerasFeatureExtractor(
SSDEfficientNetBiFPNKerasFeatureExtractor):
"""SSD Keras EfficientNet-b7 BiFPN Feature Extractor."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
freeze_batchnorm,
inplace_batchnorm_update,
bifpn_min_level=3,
bifpn_max_level=7,
bifpn_num_iterations=8,
bifpn_num_filters=384,
bifpn_combine_method='sum',
use_explicit_padding=None,
use_depthwise=None,
override_base_feature_extractor_hyperparams=None,
name='EfficientNet-B7_BiFPN'):
"""SSD Keras EfficientNet-b7 BiFPN Feature Extractor.
Args:
is_training: whether the network is in training mode.
depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
multiplier for the feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
containing convolution hyperparameters for the layers added on top of
the base feature extractor.
freeze_batchnorm: whether to freeze batch norm parameters during training
or not. When training with a small batch size (e.g. 1), it is desirable
to freeze batch norm update and use pretrained batch norm params.
inplace_batchnorm_update: whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
bifpn_min_level: the highest resolution feature map to use in BiFPN. The
valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
respectively.
bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
BiFPN constructions uses features maps starting from bifpn_min_level
upto the bifpn_max_level. In the case that there are not enough feature
maps in the backbone network, additional feature maps are created by
applying stride 2 convolutions until we get the desired number of BiFPN
levels.
bifpn_num_iterations: number of BiFPN iterations. Overrided if
efficientdet_version is provided.
bifpn_num_filters: number of filters (channels) in all BiFPN layers.
Overrided if efficientdet_version is provided.
bifpn_combine_method: the method used to combine BiFPN nodes.
use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
explicit padding when extracting features.
use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
convolutions when inputs to a node have a differing number of channels,
and use separable convolutions after combine operations.
override_base_feature_extractor_hyperparams: unsupported. Whether to
override hyperparameters of the base feature extractor with the one from
`conv_hyperparams`.
name: a string name scope to assign to the model. If 'None', Keras will
auto-generate one from the class name.
"""
super(SSDEfficientNetB7BiFPNKerasFeatureExtractor, self).__init__(
is_training=is_training,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=freeze_batchnorm,
inplace_batchnorm_update=inplace_batchnorm_update,
bifpn_min_level=bifpn_min_level,
bifpn_max_level=bifpn_max_level,
bifpn_num_iterations=bifpn_num_iterations,
bifpn_num_filters=bifpn_num_filters,
bifpn_combine_method=bifpn_combine_method,
efficientnet_version='efficientnet-b7',
use_explicit_padding=use_explicit_padding,
use_depthwise=use_depthwise,
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams,
name=name)
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for the ssd_efficientnet_bifpn_feature_extractor."""
import unittest
from absl.testing import parameterized
import numpy as np
import tensorflow.compat.v2 as tf
from google.protobuf import text_format
from object_detection.builders import hyperparams_builder
from object_detection.models import ssd_efficientnet_bifpn_feature_extractor
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
from object_detection.utils import tf_version
def _count_params(model, trainable_only=True):
"""Returns the count of all model parameters, or just trainable ones."""
if not trainable_only:
return model.count_params()
else:
return int(np.sum([
tf.keras.backend.count_params(p) for p in model.trainable_weights]))
@parameterized.parameters(
{'efficientdet_version': 'efficientdet-d0',
'efficientnet_version': 'efficientnet-b0',
'bifpn_num_iterations': 3,
'bifpn_num_filters': 64,
'bifpn_combine_method': 'fast_attention'},
{'efficientdet_version': 'efficientdet-d1',
'efficientnet_version': 'efficientnet-b1',
'bifpn_num_iterations': 4,
'bifpn_num_filters': 88,
'bifpn_combine_method': 'fast_attention'},
{'efficientdet_version': 'efficientdet-d2',
'efficientnet_version': 'efficientnet-b2',
'bifpn_num_iterations': 5,
'bifpn_num_filters': 112,
'bifpn_combine_method': 'fast_attention'},
{'efficientdet_version': 'efficientdet-d3',
'efficientnet_version': 'efficientnet-b3',
'bifpn_num_iterations': 6,
'bifpn_num_filters': 160,
'bifpn_combine_method': 'fast_attention'},
{'efficientdet_version': 'efficientdet-d4',
'efficientnet_version': 'efficientnet-b4',
'bifpn_num_iterations': 7,
'bifpn_num_filters': 224,
'bifpn_combine_method': 'fast_attention'},
{'efficientdet_version': 'efficientdet-d5',
'efficientnet_version': 'efficientnet-b5',
'bifpn_num_iterations': 7,
'bifpn_num_filters': 288,
'bifpn_combine_method': 'fast_attention'},
# efficientdet-d6 and efficientdet-d7 only differ in input size.
{'efficientdet_version': 'efficientdet-d6-d7',
'efficientnet_version': 'efficientnet-b6',
'bifpn_num_iterations': 8,
'bifpn_num_filters': 384,
'bifpn_combine_method': 'sum'})
@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class SSDEfficientNetBiFPNFeatureExtractorTest(
test_case.TestCase, parameterized.TestCase):
def _build_conv_hyperparams(self, add_batch_norm=True):
conv_hyperparams = hyperparams_pb2.Hyperparams()
conv_hyperparams_text_proto = """
force_use_bias: true
activation: SWISH
regularizer {
l2_regularizer {
weight: 0.0004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
"""
if add_batch_norm:
batch_norm_proto = """
batch_norm {
scale: true,
decay: 0.99,
epsilon: 0.001,
}
"""
conv_hyperparams_text_proto += batch_norm_proto
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
def _create_feature_extractor(self,
efficientnet_version='efficientnet-b0',
bifpn_num_iterations=3,
bifpn_num_filters=64,
bifpn_combine_method='fast_attention'):
"""Constructs a new EfficientNetBiFPN feature extractor."""
depth_multiplier = 1.0
pad_to_multiple = 1
min_depth = 16
return (ssd_efficientnet_bifpn_feature_extractor
.SSDEfficientNetBiFPNKerasFeatureExtractor(
is_training=True,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams=self._build_conv_hyperparams(),
freeze_batchnorm=False,
inplace_batchnorm_update=False,
bifpn_min_level=3,
bifpn_max_level=7,
bifpn_num_iterations=bifpn_num_iterations,
bifpn_num_filters=bifpn_num_filters,
bifpn_combine_method=bifpn_combine_method,
efficientnet_version=efficientnet_version))
def test_efficientdet_feature_extractor_shapes(self,
efficientdet_version,
efficientnet_version,
bifpn_num_iterations,
bifpn_num_filters,
bifpn_combine_method):
feature_extractor = self._create_feature_extractor(
efficientnet_version=efficientnet_version,
bifpn_num_iterations=bifpn_num_iterations,
bifpn_num_filters=bifpn_num_filters,
bifpn_combine_method=bifpn_combine_method)
outputs = feature_extractor(np.zeros((2, 256, 256, 3), dtype=np.float32))
self.assertEqual(outputs[0].shape, (2, 32, 32, bifpn_num_filters))
self.assertEqual(outputs[1].shape, (2, 16, 16, bifpn_num_filters))
self.assertEqual(outputs[2].shape, (2, 8, 8, bifpn_num_filters))
self.assertEqual(outputs[3].shape, (2, 4, 4, bifpn_num_filters))
self.assertEqual(outputs[4].shape, (2, 2, 2, bifpn_num_filters))
def test_efficientdet_feature_extractor_params(self,
efficientdet_version,
efficientnet_version,
bifpn_num_iterations,
bifpn_num_filters,
bifpn_combine_method):
feature_extractor = self._create_feature_extractor(
efficientnet_version=efficientnet_version,
bifpn_num_iterations=bifpn_num_iterations,
bifpn_num_filters=bifpn_num_filters,
bifpn_combine_method=bifpn_combine_method)
_ = feature_extractor(np.zeros((2, 256, 256, 3), dtype=np.float32))
expected_params = {
'efficientdet-d0': 5484829,
'efficientdet-d1': 8185156,
'efficientdet-d2': 9818153,
'efficientdet-d3': 13792706,
'efficientdet-d4': 22691445,
'efficientdet-d5': 35795677,
'efficientdet-d6-d7': 53624512,
}
num_params = _count_params(feature_extractor)
self.assertEqual(expected_params[efficientdet_version], num_params)
if __name__ == '__main__':
tf.test.main()
"""Setup script for object_detection with TF1.0."""
import os
from setuptools import find_packages
from setuptools import setup
REQUIRED_PACKAGES = ['apache-beam', 'pillow', 'lxml', 'matplotlib', 'Cython',
'contextlib2', 'tf-slim', 'six', 'pycocotools', 'scipy',
'pandas']
setup(
name='object_detection',
version='0.1',
install_requires=REQUIRED_PACKAGES,
include_package_data=True,
packages=(
[p for p in find_packages() if p.startswith('object_detection')] +
find_packages(where=os.path.join('.', 'slim'))),
package_dir={
'datasets': os.path.join('slim', 'datasets'),
'nets': os.path.join('slim', 'nets'),
'preprocessing': os.path.join('slim', 'preprocessing'),
'deployment': os.path.join('slim', 'deployment'),
'scripts': os.path.join('slim', 'scripts'),
},
description='Tensorflow Object Detection Library with TF1.0',
python_requires='>3.6',
)
"""Setup script for object_detection with TF2.0."""
import os
from setuptools import find_packages
from setuptools import setup
# Note: adding apache-beam to required packages causes conflict with
# tf-models-offical requirements. These packages request for incompatible
# oauth2client package.
REQUIRED_PACKAGES = ['pillow', 'lxml', 'matplotlib', 'Cython', 'contextlib2',
'tf-slim', 'six', 'pycocotools', 'scipy', 'pandas',
'tf-models-official']
setup(
name='object_detection',
version='0.1',
install_requires=REQUIRED_PACKAGES,
include_package_data=True,
packages=(
[p for p in find_packages() if p.startswith('object_detection')] +
find_packages(where=os.path.join('.', 'slim'))),
package_dir={
'datasets': os.path.join('slim', 'datasets'),
'nets': os.path.join('slim', 'nets'),
'preprocessing': os.path.join('slim', 'preprocessing'),
'deployment': os.path.join('slim', 'deployment'),
'scripts': os.path.join('slim', 'scripts'),
},
description='Tensorflow Object Detection Library',
python_requires='>3.6',
)
......@@ -61,7 +61,7 @@ class Head(object):
pass
class KerasHead(tf.keras.Model):
class KerasHead(tf.keras.layers.Layer):
"""Keras head base class."""
def call(self, features):
......
......@@ -183,6 +183,41 @@ message CenterNet {
optional float heatmap_bias_init = 3 [default = -2.19];
}
optional MaskEstimation mask_estimation_task = 8;
// Parameters which are related to DensePose estimation task.
// http://densepose.org/
message DensePoseEstimation {
// Weight of the task loss. The total loss of the model will be their
// summation of task losses weighted by the weights.
optional float task_loss_weight = 1 [default = 1.0];
// Class ID (0-indexed) that corresponds to the object in the label map that
// contains DensePose data.
optional int32 class_id = 2;
// Loss configuration for DensePose heatmap and regression losses. Note
// that the localization loss is used for surface coordinate losses and
// classification loss is used for part classification losses.
optional Loss loss = 3;
// The number of body parts.
optional int32 num_parts = 4 [default = 24];
// Loss weights for the two DensePose heads.
optional float part_loss_weight = 5 [default = 1.0];
optional float coordinate_loss_weight = 6 [default = 1.0];
// Whether to upsample the prediction feature maps back to the original
// input dimension prior to applying loss. This has the benefit of
// maintaining finer groundtruth location information.
optional bool upsample_to_input_res = 7 [default = true];
// The initial bias value of the convlution kernel of the class heatmap
// prediction head. -2.19 corresponds to predicting foreground with
// a probability of 0.1.
optional float heatmap_bias_init = 8 [default = -2.19];
}
optional DensePoseEstimation densepose_estimation_task = 9;
}
message CenterNetFeatureExtractor {
......
......@@ -4,7 +4,7 @@ package object_detection.protos;
// Message for defining a preprocessing operation on input data.
// See: //third_party/tensorflow_models/object_detection/core/preprocessor.py
// Next ID: 38
// Next ID: 39
message PreprocessingStep {
oneof preprocessing_step {
NormalizeImage normalize_image = 1;
......@@ -44,6 +44,7 @@ message PreprocessingStep {
RandomDownscaleToTargetPixels random_downscale_to_target_pixels = 35;
RandomPatchGaussian random_patch_gaussian = 36;
RandomSquareCropByScale random_square_crop_by_scale = 37;
RandomScaleCropAndPadToSquare random_scale_crop_and_pad_to_square = 38;
}
}
......@@ -572,3 +573,20 @@ message RandomSquareCropByScale {
// [min_scale, max_scale]
optional int32 num_scales = 4 [default=8];
}
// Randomly scale, crop, and then pad an image to the desired square output
// dimensions. Specifically, this method first samples a random_scale factor
// from a uniform distribution between scale_min and scale_max, and then resizes
// the image such that it's maximum dimension is (output_size * random_scale).
// Secondly, a square output_size crop is extracted from the resized image, and
// finally the cropped region is padded to the desired square output_size.
// The augmentation is borrowed from [1]
// [1]: https://arxiv.org/abs/1911.09070
message RandomScaleCropAndPadToSquare {
// The (square) output image size
optional int32 output_size = 1 [default = 512];
// The minimum and maximum values from which to sample the random scale.
optional float scale_min = 2 [default=0.1];
optional float scale_max = 3 [default=2.0];
}
......@@ -145,7 +145,7 @@ message Ssd {
optional MaskHead mask_head_config = 25;
}
// Next id: 19.
// Next id: 20.
message SsdFeatureExtractor {
reserved 6;
......@@ -185,8 +185,13 @@ message SsdFeatureExtractor {
// feature maps added by SSD.
optional bool use_depthwise = 8 [default = false];
// Feature Pyramid Networks config.
optional FeaturePyramidNetworks fpn = 10;
oneof feature_pyramid_oneof {
// Feature Pyramid Networks config.
FeaturePyramidNetworks fpn = 10;
// Bidirectional Feature Pyramid Networks config.
BidirectionalFeaturePyramidNetworks bifpn = 19;
}
// If true, replace preprocess function of feature extractor with a
// placeholder. This should only be used if all the image preprocessing steps
......@@ -225,3 +230,23 @@ message FeaturePyramidNetworks {
}
// Configuration for Bidirectional Feature Pyramid Networks.
message BidirectionalFeaturePyramidNetworks {
// minimum level in the feature pyramid.
optional int32 min_level = 1 [default = 3];
// maximum level in the feature pyramid.
optional int32 max_level = 2 [default = 7];
// The number of repeated top-down bottom-up iterations for BiFPN-based
// feature extractors (bidirectional feature pyramid networks).
optional int32 num_iterations = 3;
// The number of filters (channels) to use in feature pyramid layers for
// BiFPN-based feature extractors (bidirectional feature pyramid networks).
optional int32 num_filters = 4;
// Method used to combine inputs to BiFPN nodes.
optional string combine_method = 5 [default = 'fast_attention'];
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment