Unverified Commit ed4e22b8 authored by pkulzc's avatar pkulzc Committed by GitHub
Browse files

Merge pull request #3973 from pkulzc/master

Object detection internal changes
parents cac90a0e 13b89b93
...@@ -21,6 +21,7 @@ import tensorflow as tf ...@@ -21,6 +21,7 @@ import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators from object_detection.models import feature_map_generators
from object_detection.utils import context_manager
from object_detection.utils import ops from object_detection.utils import ops
from object_detection.utils import shape_utils from object_detection.utils import shape_utils
from nets import resnet_v1 from nets import resnet_v1
...@@ -36,15 +37,14 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -36,15 +37,14 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
depth_multiplier, depth_multiplier,
min_depth, min_depth,
pad_to_multiple, pad_to_multiple,
conv_hyperparams, conv_hyperparams_fn,
resnet_base_fn, resnet_base_fn,
resnet_scope_name, resnet_scope_name,
fpn_scope_name, fpn_scope_name,
batch_norm_trainable=True,
reuse_weights=None, reuse_weights=None,
use_explicit_padding=False, use_explicit_padding=False,
use_depthwise=False, use_depthwise=False,
inplace_batchnorm_update=False): override_base_feature_extractor_hyperparams=False):
"""SSD FPN feature extractor based on Resnet v1 architecture. """SSD FPN feature extractor based on Resnet v1 architecture.
Args: Args:
...@@ -54,32 +54,28 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -54,32 +54,28 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
min_depth: minimum feature extractor depth. UNUSED Currently. min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops in the layers that are added on top of the
base feature extractor.
resnet_base_fn: base resnet network to use. resnet_base_fn: base resnet network to use.
resnet_scope_name: scope name under which to construct resnet resnet_scope_name: scope name under which to construct resnet
fpn_scope_name: scope name under which to construct the feature pyramid fpn_scope_name: scope name under which to construct the feature pyramid
network. network.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently. features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently. use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during override_base_feature_extractor_hyperparams: Whether to override
training. This is required for batch norm to work correctly on TPUs. hyperparameters of the base feature extractor with the one from
When this is false, user must add a control dependency on `conv_hyperparams_fn`.
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
Raises: Raises:
ValueError: On supplying invalid arguments for unused arguments. ValueError: On supplying invalid arguments for unused arguments.
""" """
super(_SSDResnetV1FpnFeatureExtractor, self).__init__( super(_SSDResnetV1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights, conv_hyperparams_fn, reuse_weights, use_explicit_padding,
use_explicit_padding, inplace_batchnorm_update) override_base_feature_extractor_hyperparams)
if self._depth_multiplier != 1.0: if self._depth_multiplier != 1.0:
raise ValueError('Only depth 1.0 is supported, found: {}'. raise ValueError('Only depth 1.0 is supported, found: {}'.
format(self._depth_multiplier)) format(self._depth_multiplier))
...@@ -116,7 +112,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -116,7 +112,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
filtered_image_features[feature_name] = feature filtered_image_features[feature_name] = feature
return filtered_image_features return filtered_image_features
def _extract_features(self, preprocessed_inputs): def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs. """Extract features from preprocessed inputs.
Args: Args:
...@@ -139,19 +135,22 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -139,19 +135,22 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
with tf.variable_scope( with tf.variable_scope(
self._resnet_scope_name, reuse=self._reuse_weights) as scope: self._resnet_scope_name, reuse=self._reuse_weights) as scope:
with slim.arg_scope(resnet_v1.resnet_arg_scope()): with slim.arg_scope(resnet_v1.resnet_arg_scope()):
_, image_features = self._resnet_base_fn( with (slim.arg_scope(self._conv_hyperparams_fn())
inputs=ops.pad_to_multiple(preprocessed_inputs, if self._override_base_feature_extractor_hyperparams else
self._pad_to_multiple), context_manager.IdentityContextManager()):
num_classes=None, _, image_features = self._resnet_base_fn(
is_training=self._is_training and self._batch_norm_trainable, inputs=ops.pad_to_multiple(preprocessed_inputs,
global_pool=False, self._pad_to_multiple),
output_stride=None, num_classes=None,
store_non_strided_activations=True, is_training=None,
scope=scope) global_pool=False,
output_stride=None,
store_non_strided_activations=True,
scope=scope)
image_features = self._filter_features(image_features) image_features = self._filter_features(image_features)
last_feature_map = image_features['block4'] last_feature_map = image_features['block4']
with tf.variable_scope(self._fpn_scope_name, reuse=self._reuse_weights): with tf.variable_scope(self._fpn_scope_name, reuse=self._reuse_weights):
with slim.arg_scope(self._conv_hyperparams): with slim.arg_scope(self._conv_hyperparams_fn()):
for i in range(5, 7): for i in range(5, 7):
last_feature_map = slim.conv2d( last_feature_map = slim.conv2d(
last_feature_map, last_feature_map,
...@@ -178,40 +177,36 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor): ...@@ -178,40 +177,36 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
depth_multiplier, depth_multiplier,
min_depth, min_depth,
pad_to_multiple, pad_to_multiple,
conv_hyperparams, conv_hyperparams_fn,
batch_norm_trainable=True,
reuse_weights=None, reuse_weights=None,
use_explicit_padding=False, use_explicit_padding=False,
use_depthwise=False, use_depthwise=False,
inplace_batchnorm_update=False): override_base_feature_extractor_hyperparams=False):
"""Resnet50 v1 FPN Feature Extractor for SSD Models. """SSD Resnet50 V1 FPN feature extractor based on Resnet v1 architecture.
Args: Args:
is_training: whether the network is in training mode. is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor. depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth. UNUSED currently.
min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
batch_norm_trainable: Whether to update batch norm parameters during and separable_conv2d ops in the layers that are added on top of the
training or not. When training with a small batch size base feature extractor.
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently. features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently. use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during override_base_feature_extractor_hyperparams: Whether to override
training. This is required for batch norm to work correctly on TPUs. hyperparameters of the base feature extractor with the one from
When this is false, user must add a control dependency on `conv_hyperparams_fn`.
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
""" """
super(SSDResnet50V1FpnFeatureExtractor, self).__init__( super(SSDResnet50V1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, resnet_v1.resnet_v1_50, 'resnet_v1_50', 'fpn', conv_hyperparams_fn, resnet_v1.resnet_v1_50, 'resnet_v1_50', 'fpn',
batch_norm_trainable, reuse_weights, use_explicit_padding, reuse_weights, use_explicit_padding,
inplace_batchnorm_update) override_base_feature_extractor_hyperparams)
class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor): class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
...@@ -221,40 +216,36 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor): ...@@ -221,40 +216,36 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
depth_multiplier, depth_multiplier,
min_depth, min_depth,
pad_to_multiple, pad_to_multiple,
conv_hyperparams, conv_hyperparams_fn,
batch_norm_trainable=True,
reuse_weights=None, reuse_weights=None,
use_explicit_padding=False, use_explicit_padding=False,
use_depthwise=False, use_depthwise=False,
inplace_batchnorm_update=False): override_base_feature_extractor_hyperparams=False):
"""Resnet101 v1 FPN Feature Extractor for SSD Models. """SSD Resnet101 V1 FPN feature extractor based on Resnet v1 architecture.
Args: Args:
is_training: whether the network is in training mode. is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor. depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth. UNUSED currently.
min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
batch_norm_trainable: Whether to update batch norm parameters during and separable_conv2d ops in the layers that are added on top of the
training or not. When training with a small batch size base feature extractor.
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently. features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently. use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during override_base_feature_extractor_hyperparams: Whether to override
training. This is required for batch norm to work correctly on TPUs. hyperparameters of the base feature extractor with the one from
When this is false, user must add a control dependency on `conv_hyperparams_fn`.
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
""" """
super(SSDResnet101V1FpnFeatureExtractor, self).__init__( super(SSDResnet101V1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, resnet_v1.resnet_v1_101, 'resnet_v1_101', 'fpn', conv_hyperparams_fn, resnet_v1.resnet_v1_101, 'resnet_v1_101', 'fpn',
batch_norm_trainable, reuse_weights, use_explicit_padding, reuse_weights, use_explicit_padding,
inplace_batchnorm_update) override_base_feature_extractor_hyperparams)
class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor): class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
...@@ -264,37 +255,33 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor): ...@@ -264,37 +255,33 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
depth_multiplier, depth_multiplier,
min_depth, min_depth,
pad_to_multiple, pad_to_multiple,
conv_hyperparams, conv_hyperparams_fn,
batch_norm_trainable=True,
reuse_weights=None, reuse_weights=None,
use_explicit_padding=False, use_explicit_padding=False,
use_depthwise=False, use_depthwise=False,
inplace_batchnorm_update=False): override_base_feature_extractor_hyperparams=False):
"""Resnet152 v1 FPN Feature Extractor for SSD Models. """SSD Resnet152 V1 FPN feature extractor based on Resnet v1 architecture.
Args: Args:
is_training: whether the network is in training mode. is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor. depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth. UNUSED currently.
min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
batch_norm_trainable: Whether to update batch norm parameters during and separable_conv2d ops in the layers that are added on top of the
training or not. When training with a small batch size base feature extractor.
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently. features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently. use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during override_base_feature_extractor_hyperparams: Whether to override
training. This is required for batch norm to work correctly on TPUs. hyperparameters of the base feature extractor with the one from
When this is false, user must add a control dependency on `conv_hyperparams_fn`.
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
""" """
super(SSDResnet152V1FpnFeatureExtractor, self).__init__( super(SSDResnet152V1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, resnet_v1.resnet_v1_152, 'resnet_v1_152', 'fpn', conv_hyperparams_fn, resnet_v1.resnet_v1_152, 'resnet_v1_152', 'fpn',
batch_norm_trainable, reuse_weights, use_explicit_padding, reuse_weights, use_explicit_padding,
inplace_batchnorm_update) override_base_feature_extractor_hyperparams)
...@@ -118,6 +118,7 @@ message MaskRCNNBoxPredictor { ...@@ -118,6 +118,7 @@ message MaskRCNNBoxPredictor {
// The number of convolutions applied to image_features in the mask prediction // The number of convolutions applied to image_features in the mask prediction
// branch. // branch.
optional int32 mask_prediction_num_conv_layers = 11 [default = 2]; optional int32 mask_prediction_num_conv_layers = 11 [default = 2];
optional bool masks_are_class_agnostic = 12 [default = false];
} }
message RfcnBoxPredictor { message RfcnBoxPredictor {
......
This diff is collapsed.
...@@ -6,8 +6,11 @@ import "object_detection/protos/optimizer.proto"; ...@@ -6,8 +6,11 @@ import "object_detection/protos/optimizer.proto";
import "object_detection/protos/preprocessor.proto"; import "object_detection/protos/preprocessor.proto";
// Message for configuring DetectionModel training jobs (train.py). // Message for configuring DetectionModel training jobs (train.py).
// Next id: 25
message TrainConfig { message TrainConfig {
// Input queue batch size. // Effective batch size to use for training.
// For TPU (or sync SGD jobs), the batch size per core (or GPU) is going to be
// `batch_size` / number of cores (or `batch_size` / number of GPUs).
optional uint32 batch_size = 1 [default=32]; optional uint32 batch_size = 1 [default=32];
// Data augmentation options. // Data augmentation options.
...@@ -78,6 +81,10 @@ message TrainConfig { ...@@ -78,6 +81,10 @@ message TrainConfig {
// Note that only Sigmoid classification losses should be used. // Note that only Sigmoid classification losses should be used.
optional bool merge_multiple_label_boxes = 17 [default=false]; optional bool merge_multiple_label_boxes = 17 [default=false];
// If true, will use multiclass scores from object annotations as ground
// truth. Currently only compatible with annotated image inputs.
optional bool use_multiclass_scores = 24 [default = false];
// Whether to add regularization loss to `total_loss`. This is true by // Whether to add regularization loss to `total_loss`. This is true by
// default and adds all regularization losses defined in the model to // default and adds all regularization losses defined in the model to
// `total_loss`. // `total_loss`.
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment