Unverified Commit ed4e22b8 authored by pkulzc's avatar pkulzc Committed by GitHub
Browse files

Merge pull request #3973 from pkulzc/master

Object detection internal changes
parents cac90a0e 13b89b93
......@@ -21,6 +21,7 @@ import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators
from object_detection.utils import context_manager
from object_detection.utils import ops
from object_detection.utils import shape_utils
from nets import resnet_v1
......@@ -36,15 +37,14 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
conv_hyperparams_fn,
resnet_base_fn,
resnet_scope_name,
fpn_scope_name,
batch_norm_trainable=True,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False,
inplace_batchnorm_update=False):
override_base_feature_extractor_hyperparams=False):
"""SSD FPN feature extractor based on Resnet v1 architecture.
Args:
......@@ -54,32 +54,28 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops in the layers that are added on top of the
base feature extractor.
resnet_base_fn: base resnet network to use.
resnet_scope_name: scope name under which to construct resnet
fpn_scope_name: scope name under which to construct the feature pyramid
network.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_fn`.
Raises:
ValueError: On supplying invalid arguments for unused arguments.
"""
super(_SSDResnetV1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights,
use_explicit_padding, inplace_batchnorm_update)
conv_hyperparams_fn, reuse_weights, use_explicit_padding,
override_base_feature_extractor_hyperparams)
if self._depth_multiplier != 1.0:
raise ValueError('Only depth 1.0 is supported, found: {}'.
format(self._depth_multiplier))
......@@ -116,7 +112,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
filtered_image_features[feature_name] = feature
return filtered_image_features
def _extract_features(self, preprocessed_inputs):
def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
......@@ -139,19 +135,22 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
with tf.variable_scope(
self._resnet_scope_name, reuse=self._reuse_weights) as scope:
with slim.arg_scope(resnet_v1.resnet_arg_scope()):
_, image_features = self._resnet_base_fn(
inputs=ops.pad_to_multiple(preprocessed_inputs,
self._pad_to_multiple),
num_classes=None,
is_training=self._is_training and self._batch_norm_trainable,
global_pool=False,
output_stride=None,
store_non_strided_activations=True,
scope=scope)
with (slim.arg_scope(self._conv_hyperparams_fn())
if self._override_base_feature_extractor_hyperparams else
context_manager.IdentityContextManager()):
_, image_features = self._resnet_base_fn(
inputs=ops.pad_to_multiple(preprocessed_inputs,
self._pad_to_multiple),
num_classes=None,
is_training=None,
global_pool=False,
output_stride=None,
store_non_strided_activations=True,
scope=scope)
image_features = self._filter_features(image_features)
last_feature_map = image_features['block4']
with tf.variable_scope(self._fpn_scope_name, reuse=self._reuse_weights):
with slim.arg_scope(self._conv_hyperparams):
with slim.arg_scope(self._conv_hyperparams_fn()):
for i in range(5, 7):
last_feature_map = slim.conv2d(
last_feature_map,
......@@ -178,40 +177,36 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
batch_norm_trainable=True,
conv_hyperparams_fn,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False,
inplace_batchnorm_update=False):
"""Resnet50 v1 FPN Feature Extractor for SSD Models.
override_base_feature_extractor_hyperparams=False):
"""SSD Resnet50 V1 FPN feature extractor based on Resnet v1 architecture.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
UNUSED currently.
min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops in the layers that are added on top of the
base feature extractor.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_fn`.
"""
super(SSDResnet50V1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, resnet_v1.resnet_v1_50, 'resnet_v1_50', 'fpn',
batch_norm_trainable, reuse_weights, use_explicit_padding,
inplace_batchnorm_update)
conv_hyperparams_fn, resnet_v1.resnet_v1_50, 'resnet_v1_50', 'fpn',
reuse_weights, use_explicit_padding,
override_base_feature_extractor_hyperparams)
class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
......@@ -221,40 +216,36 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
batch_norm_trainable=True,
conv_hyperparams_fn,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False,
inplace_batchnorm_update=False):
"""Resnet101 v1 FPN Feature Extractor for SSD Models.
override_base_feature_extractor_hyperparams=False):
"""SSD Resnet101 V1 FPN feature extractor based on Resnet v1 architecture.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
UNUSED currently.
min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops in the layers that are added on top of the
base feature extractor.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_fn`.
"""
super(SSDResnet101V1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, resnet_v1.resnet_v1_101, 'resnet_v1_101', 'fpn',
batch_norm_trainable, reuse_weights, use_explicit_padding,
inplace_batchnorm_update)
conv_hyperparams_fn, resnet_v1.resnet_v1_101, 'resnet_v1_101', 'fpn',
reuse_weights, use_explicit_padding,
override_base_feature_extractor_hyperparams)
class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
......@@ -264,37 +255,33 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
batch_norm_trainable=True,
conv_hyperparams_fn,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False,
inplace_batchnorm_update=False):
"""Resnet152 v1 FPN Feature Extractor for SSD Models.
override_base_feature_extractor_hyperparams=False):
"""SSD Resnet152 V1 FPN feature extractor based on Resnet v1 architecture.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
UNUSED currently.
min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops in the layers that are added on top of the
base feature extractor.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_fn`.
"""
super(SSDResnet152V1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, resnet_v1.resnet_v1_152, 'resnet_v1_152', 'fpn',
batch_norm_trainable, reuse_weights, use_explicit_padding,
inplace_batchnorm_update)
conv_hyperparams_fn, resnet_v1.resnet_v1_152, 'resnet_v1_152', 'fpn',
reuse_weights, use_explicit_padding,
override_base_feature_extractor_hyperparams)
......@@ -118,6 +118,7 @@ message MaskRCNNBoxPredictor {
// The number of convolutions applied to image_features in the mask prediction
// branch.
optional int32 mask_prediction_num_conv_layers = 11 [default = 2];
optional bool masks_are_class_agnostic = 12 [default = false];
}
message RfcnBoxPredictor {
......
This diff is collapsed.
......@@ -6,8 +6,11 @@ import "object_detection/protos/optimizer.proto";
import "object_detection/protos/preprocessor.proto";
// Message for configuring DetectionModel training jobs (train.py).
// Next id: 25
message TrainConfig {
// Input queue batch size.
// Effective batch size to use for training.
// For TPU (or sync SGD jobs), the batch size per core (or GPU) is going to be
// `batch_size` / number of cores (or `batch_size` / number of GPUs).
optional uint32 batch_size = 1 [default=32];
// Data augmentation options.
......@@ -78,6 +81,10 @@ message TrainConfig {
// Note that only Sigmoid classification losses should be used.
optional bool merge_multiple_label_boxes = 17 [default=false];
// If true, will use multiclass scores from object annotations as ground
// truth. Currently only compatible with annotated image inputs.
optional bool use_multiclass_scores = 24 [default = false];
// Whether to add regularization loss to `total_loss`. This is true by
// default and adds all regularization losses defined in the model to
// `total_loss`.
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment