Unverified Commit ed4e22b8 authored by pkulzc's avatar pkulzc Committed by GitHub
Browse files

Merge pull request #3973 from pkulzc/master

Object detection internal changes
parents cac90a0e 13b89b93
...@@ -21,6 +21,7 @@ import tensorflow as tf ...@@ -21,6 +21,7 @@ import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators from object_detection.models import feature_map_generators
from object_detection.utils import context_manager
from object_detection.utils import ops from object_detection.utils import ops
from object_detection.utils import shape_utils from object_detection.utils import shape_utils
from nets import resnet_v1 from nets import resnet_v1
...@@ -36,15 +37,14 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -36,15 +37,14 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
depth_multiplier, depth_multiplier,
min_depth, min_depth,
pad_to_multiple, pad_to_multiple,
conv_hyperparams, conv_hyperparams_fn,
resnet_base_fn, resnet_base_fn,
resnet_scope_name, resnet_scope_name,
fpn_scope_name, fpn_scope_name,
batch_norm_trainable=True,
reuse_weights=None, reuse_weights=None,
use_explicit_padding=False, use_explicit_padding=False,
use_depthwise=False, use_depthwise=False,
inplace_batchnorm_update=False): override_base_feature_extractor_hyperparams=False):
"""SSD FPN feature extractor based on Resnet v1 architecture. """SSD FPN feature extractor based on Resnet v1 architecture.
Args: Args:
...@@ -54,32 +54,28 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -54,32 +54,28 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
min_depth: minimum feature extractor depth. UNUSED Currently. min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops in the layers that are added on top of the
base feature extractor.
resnet_base_fn: base resnet network to use. resnet_base_fn: base resnet network to use.
resnet_scope_name: scope name under which to construct resnet resnet_scope_name: scope name under which to construct resnet
fpn_scope_name: scope name under which to construct the feature pyramid fpn_scope_name: scope name under which to construct the feature pyramid
network. network.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently. features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently. use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during override_base_feature_extractor_hyperparams: Whether to override
training. This is required for batch norm to work correctly on TPUs. hyperparameters of the base feature extractor with the one from
When this is false, user must add a control dependency on `conv_hyperparams_fn`.
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
Raises: Raises:
ValueError: On supplying invalid arguments for unused arguments. ValueError: On supplying invalid arguments for unused arguments.
""" """
super(_SSDResnetV1FpnFeatureExtractor, self).__init__( super(_SSDResnetV1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights, conv_hyperparams_fn, reuse_weights, use_explicit_padding,
use_explicit_padding, inplace_batchnorm_update) override_base_feature_extractor_hyperparams)
if self._depth_multiplier != 1.0: if self._depth_multiplier != 1.0:
raise ValueError('Only depth 1.0 is supported, found: {}'. raise ValueError('Only depth 1.0 is supported, found: {}'.
format(self._depth_multiplier)) format(self._depth_multiplier))
...@@ -116,7 +112,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -116,7 +112,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
filtered_image_features[feature_name] = feature filtered_image_features[feature_name] = feature
return filtered_image_features return filtered_image_features
def _extract_features(self, preprocessed_inputs): def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs. """Extract features from preprocessed inputs.
Args: Args:
...@@ -139,11 +135,14 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -139,11 +135,14 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
with tf.variable_scope( with tf.variable_scope(
self._resnet_scope_name, reuse=self._reuse_weights) as scope: self._resnet_scope_name, reuse=self._reuse_weights) as scope:
with slim.arg_scope(resnet_v1.resnet_arg_scope()): with slim.arg_scope(resnet_v1.resnet_arg_scope()):
with (slim.arg_scope(self._conv_hyperparams_fn())
if self._override_base_feature_extractor_hyperparams else
context_manager.IdentityContextManager()):
_, image_features = self._resnet_base_fn( _, image_features = self._resnet_base_fn(
inputs=ops.pad_to_multiple(preprocessed_inputs, inputs=ops.pad_to_multiple(preprocessed_inputs,
self._pad_to_multiple), self._pad_to_multiple),
num_classes=None, num_classes=None,
is_training=self._is_training and self._batch_norm_trainable, is_training=None,
global_pool=False, global_pool=False,
output_stride=None, output_stride=None,
store_non_strided_activations=True, store_non_strided_activations=True,
...@@ -151,7 +150,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -151,7 +150,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
image_features = self._filter_features(image_features) image_features = self._filter_features(image_features)
last_feature_map = image_features['block4'] last_feature_map = image_features['block4']
with tf.variable_scope(self._fpn_scope_name, reuse=self._reuse_weights): with tf.variable_scope(self._fpn_scope_name, reuse=self._reuse_weights):
with slim.arg_scope(self._conv_hyperparams): with slim.arg_scope(self._conv_hyperparams_fn()):
for i in range(5, 7): for i in range(5, 7):
last_feature_map = slim.conv2d( last_feature_map = slim.conv2d(
last_feature_map, last_feature_map,
...@@ -178,40 +177,36 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor): ...@@ -178,40 +177,36 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
depth_multiplier, depth_multiplier,
min_depth, min_depth,
pad_to_multiple, pad_to_multiple,
conv_hyperparams, conv_hyperparams_fn,
batch_norm_trainable=True,
reuse_weights=None, reuse_weights=None,
use_explicit_padding=False, use_explicit_padding=False,
use_depthwise=False, use_depthwise=False,
inplace_batchnorm_update=False): override_base_feature_extractor_hyperparams=False):
"""Resnet50 v1 FPN Feature Extractor for SSD Models. """SSD Resnet50 V1 FPN feature extractor based on Resnet v1 architecture.
Args: Args:
is_training: whether the network is in training mode. is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor. depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth. UNUSED currently.
min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
batch_norm_trainable: Whether to update batch norm parameters during and separable_conv2d ops in the layers that are added on top of the
training or not. When training with a small batch size base feature extractor.
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently. features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently. use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during override_base_feature_extractor_hyperparams: Whether to override
training. This is required for batch norm to work correctly on TPUs. hyperparameters of the base feature extractor with the one from
When this is false, user must add a control dependency on `conv_hyperparams_fn`.
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
""" """
super(SSDResnet50V1FpnFeatureExtractor, self).__init__( super(SSDResnet50V1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, resnet_v1.resnet_v1_50, 'resnet_v1_50', 'fpn', conv_hyperparams_fn, resnet_v1.resnet_v1_50, 'resnet_v1_50', 'fpn',
batch_norm_trainable, reuse_weights, use_explicit_padding, reuse_weights, use_explicit_padding,
inplace_batchnorm_update) override_base_feature_extractor_hyperparams)
class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor): class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
...@@ -221,40 +216,36 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor): ...@@ -221,40 +216,36 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
depth_multiplier, depth_multiplier,
min_depth, min_depth,
pad_to_multiple, pad_to_multiple,
conv_hyperparams, conv_hyperparams_fn,
batch_norm_trainable=True,
reuse_weights=None, reuse_weights=None,
use_explicit_padding=False, use_explicit_padding=False,
use_depthwise=False, use_depthwise=False,
inplace_batchnorm_update=False): override_base_feature_extractor_hyperparams=False):
"""Resnet101 v1 FPN Feature Extractor for SSD Models. """SSD Resnet101 V1 FPN feature extractor based on Resnet v1 architecture.
Args: Args:
is_training: whether the network is in training mode. is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor. depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth. UNUSED currently.
min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
batch_norm_trainable: Whether to update batch norm parameters during and separable_conv2d ops in the layers that are added on top of the
training or not. When training with a small batch size base feature extractor.
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently. features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently. use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during override_base_feature_extractor_hyperparams: Whether to override
training. This is required for batch norm to work correctly on TPUs. hyperparameters of the base feature extractor with the one from
When this is false, user must add a control dependency on `conv_hyperparams_fn`.
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
""" """
super(SSDResnet101V1FpnFeatureExtractor, self).__init__( super(SSDResnet101V1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, resnet_v1.resnet_v1_101, 'resnet_v1_101', 'fpn', conv_hyperparams_fn, resnet_v1.resnet_v1_101, 'resnet_v1_101', 'fpn',
batch_norm_trainable, reuse_weights, use_explicit_padding, reuse_weights, use_explicit_padding,
inplace_batchnorm_update) override_base_feature_extractor_hyperparams)
class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor): class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
...@@ -264,37 +255,33 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor): ...@@ -264,37 +255,33 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
depth_multiplier, depth_multiplier,
min_depth, min_depth,
pad_to_multiple, pad_to_multiple,
conv_hyperparams, conv_hyperparams_fn,
batch_norm_trainable=True,
reuse_weights=None, reuse_weights=None,
use_explicit_padding=False, use_explicit_padding=False,
use_depthwise=False, use_depthwise=False,
inplace_batchnorm_update=False): override_base_feature_extractor_hyperparams=False):
"""Resnet152 v1 FPN Feature Extractor for SSD Models. """SSD Resnet152 V1 FPN feature extractor based on Resnet v1 architecture.
Args: Args:
is_training: whether the network is in training mode. is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor. depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth. UNUSED currently.
min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
batch_norm_trainable: Whether to update batch norm parameters during and separable_conv2d ops in the layers that are added on top of the
training or not. When training with a small batch size base feature extractor.
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently. features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently. use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during override_base_feature_extractor_hyperparams: Whether to override
training. This is required for batch norm to work correctly on TPUs. hyperparameters of the base feature extractor with the one from
When this is false, user must add a control dependency on `conv_hyperparams_fn`.
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
""" """
super(SSDResnet152V1FpnFeatureExtractor, self).__init__( super(SSDResnet152V1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, resnet_v1.resnet_v1_152, 'resnet_v1_152', 'fpn', conv_hyperparams_fn, resnet_v1.resnet_v1_152, 'resnet_v1_152', 'fpn',
batch_norm_trainable, reuse_weights, use_explicit_padding, reuse_weights, use_explicit_padding,
inplace_batchnorm_update) override_base_feature_extractor_hyperparams)
...@@ -27,13 +27,10 @@ class SSDResnet50V1FeatureExtractorTest( ...@@ -27,13 +27,10 @@ class SSDResnet50V1FeatureExtractorTest(
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
use_explicit_padding=False): use_explicit_padding=False):
min_depth = 32 min_depth = 32
conv_hyperparams = {}
batch_norm_trainable = True
is_training = True is_training = True
return ssd_resnet_v1_fpn_feature_extractor.SSDResnet50V1FpnFeatureExtractor( return ssd_resnet_v1_fpn_feature_extractor.SSDResnet50V1FpnFeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, self.conv_hyperparams_fn, use_explicit_padding=use_explicit_padding)
use_explicit_padding=use_explicit_padding)
def _resnet_scope_name(self): def _resnet_scope_name(self):
return 'resnet_v1_50' return 'resnet_v1_50'
...@@ -47,13 +44,14 @@ class SSDResnet101V1FeatureExtractorTest( ...@@ -47,13 +44,14 @@ class SSDResnet101V1FeatureExtractorTest(
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
use_explicit_padding=False): use_explicit_padding=False):
min_depth = 32 min_depth = 32
conv_hyperparams = {}
batch_norm_trainable = True
is_training = True is_training = True
return ( return (
ssd_resnet_v1_fpn_feature_extractor.SSDResnet101V1FpnFeatureExtractor( ssd_resnet_v1_fpn_feature_extractor.SSDResnet101V1FpnFeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training,
conv_hyperparams, batch_norm_trainable, depth_multiplier,
min_depth,
pad_to_multiple,
self.conv_hyperparams_fn,
use_explicit_padding=use_explicit_padding)) use_explicit_padding=use_explicit_padding))
def _resnet_scope_name(self): def _resnet_scope_name(self):
...@@ -68,13 +66,14 @@ class SSDResnet152V1FeatureExtractorTest( ...@@ -68,13 +66,14 @@ class SSDResnet152V1FeatureExtractorTest(
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
use_explicit_padding=False): use_explicit_padding=False):
min_depth = 32 min_depth = 32
conv_hyperparams = {}
batch_norm_trainable = True
is_training = True is_training = True
return ( return (
ssd_resnet_v1_fpn_feature_extractor.SSDResnet152V1FpnFeatureExtractor( ssd_resnet_v1_fpn_feature_extractor.SSDResnet152V1FpnFeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training,
conv_hyperparams, batch_norm_trainable, depth_multiplier,
min_depth,
pad_to_multiple,
self.conv_hyperparams_fn,
use_explicit_padding=use_explicit_padding)) use_explicit_padding=use_explicit_padding))
def _resnet_scope_name(self): def _resnet_scope_name(self):
......
...@@ -118,6 +118,7 @@ message MaskRCNNBoxPredictor { ...@@ -118,6 +118,7 @@ message MaskRCNNBoxPredictor {
// The number of convolutions applied to image_features in the mask prediction // The number of convolutions applied to image_features in the mask prediction
// branch. // branch.
optional int32 mask_prediction_num_conv_layers = 11 [default = 2]; optional int32 mask_prediction_num_conv_layers = 11 [default = 2];
optional bool masks_are_class_agnostic = 12 [default = false];
} }
message RfcnBoxPredictor { message RfcnBoxPredictor {
......
...@@ -60,6 +60,21 @@ message Ssd { ...@@ -60,6 +60,21 @@ message Ssd {
// Loss configuration for training. // Loss configuration for training.
optional Loss loss = 11; optional Loss loss = 11;
// Whether to update batch norm parameters during training or not.
// When training with a relative small batch size (e.g. 1), it is
// desirable to disable batch norm update and use pretrained batch norm
// params.
//
// Note: Some feature extractors are used with canned arg_scopes
// (e.g resnet arg scopes). In these cases training behavior of batch norm
// variables may depend on both values of `batch_norm_trainable` and
// `is_training`.
//
// When canned arg_scopes are used with feature extractors `conv_hyperparams`
// will apply only to the additional layers that are added and are outside the
// canned arg_scope.
optional bool freeze_batchnorm = 16 [default = false];
// Whether to update batch_norm inplace during training. This is required // Whether to update batch_norm inplace during training. This is required
// for batch norm to work correctly on TPUs. When this is false, user must add // for batch norm to work correctly on TPUs. When this is false, user must add
// a control dependency on tf.GraphKeys.UPDATE_OPS for train/loss op in order // a control dependency on tf.GraphKeys.UPDATE_OPS for train/loss op in order
...@@ -69,6 +84,8 @@ message Ssd { ...@@ -69,6 +84,8 @@ message Ssd {
message SsdFeatureExtractor { message SsdFeatureExtractor {
reserved 6;
// Type of ssd feature extractor. // Type of ssd feature extractor.
optional string type = 1; optional string type = 1;
...@@ -82,26 +99,19 @@ message SsdFeatureExtractor { ...@@ -82,26 +99,19 @@ message SsdFeatureExtractor {
// of the base feature extractor. // of the base feature extractor.
optional Hyperparams conv_hyperparams = 4; optional Hyperparams conv_hyperparams = 4;
// Normally, SSD feature extractors are constructed by reusing an existing
// base feature extractor (that has its own hyperparams) and adding new layers
// on top of it. `conv_hyperparams` above normally applies only to the new
// layers while base feature extractor uses its own default hyperparams. If
// this value is set to true, the base feature extractor's hyperparams will be
// overridden with the `conv_hyperparams`.
optional bool override_base_feature_extractor_hyperparams = 9 [default = false];
// The nearest multiple to zero-pad the input height and width dimensions to. // The nearest multiple to zero-pad the input height and width dimensions to.
// For example, if pad_to_multiple = 2, input dimensions are zero-padded // For example, if pad_to_multiple = 2, input dimensions are zero-padded
// until the resulting dimensions are even. // until the resulting dimensions are even.
optional int32 pad_to_multiple = 5 [default = 1]; optional int32 pad_to_multiple = 5 [default = 1];
// Whether to update batch norm parameters during training or not.
// When training with a relative small batch size (e.g. 1), it is
// desirable to disable batch norm update and use pretrained batch norm
// params.
//
// Note: Some feature extractors are used with canned arg_scopes
// (e.g resnet arg scopes). In these cases training behavior of batch norm
// variables may depend on both values of `batch_norm_trainable` and
// `is_training`.
//
// When canned arg_scopes are used with feature extractors `conv_hyperparams`
// will apply only to the additional layers that are added and are outside the
// canned arg_scope.
optional bool batch_norm_trainable = 6 [default=true];
// Whether to use explicit padding when extracting SSD multiresolution // Whether to use explicit padding when extracting SSD multiresolution
// features. Note that this does not apply to the base feature extractor. // features. Note that this does not apply to the base feature extractor.
optional bool use_explicit_padding = 7 [default=false]; optional bool use_explicit_padding = 7 [default=false];
......
...@@ -6,8 +6,11 @@ import "object_detection/protos/optimizer.proto"; ...@@ -6,8 +6,11 @@ import "object_detection/protos/optimizer.proto";
import "object_detection/protos/preprocessor.proto"; import "object_detection/protos/preprocessor.proto";
// Message for configuring DetectionModel training jobs (train.py). // Message for configuring DetectionModel training jobs (train.py).
// Next id: 25
message TrainConfig { message TrainConfig {
// Input queue batch size. // Effective batch size to use for training.
// For TPU (or sync SGD jobs), the batch size per core (or GPU) is going to be
// `batch_size` / number of cores (or `batch_size` / number of GPUs).
optional uint32 batch_size = 1 [default=32]; optional uint32 batch_size = 1 [default=32];
// Data augmentation options. // Data augmentation options.
...@@ -78,6 +81,10 @@ message TrainConfig { ...@@ -78,6 +81,10 @@ message TrainConfig {
// Note that only Sigmoid classification losses should be used. // Note that only Sigmoid classification losses should be used.
optional bool merge_multiple_label_boxes = 17 [default=false]; optional bool merge_multiple_label_boxes = 17 [default=false];
// If true, will use multiclass scores from object annotations as ground
// truth. Currently only compatible with annotated image inputs.
optional bool use_multiclass_scores = 24 [default = false];
// Whether to add regularization loss to `total_loss`. This is true by // Whether to add regularization loss to `total_loss`. This is true by
// default and adds all regularization losses defined in the model to // default and adds all regularization losses defined in the model to
// `total_loss`. // `total_loss`.
......
...@@ -98,6 +98,7 @@ model { ...@@ -98,6 +98,7 @@ model {
epsilon: 0.001, epsilon: 0.001,
} }
} }
override_base_feature_extractor_hyperparams: true
} }
loss { loss {
classification_loss { classification_loss {
......
...@@ -98,6 +98,7 @@ model { ...@@ -98,6 +98,7 @@ model {
epsilon: 0.001, epsilon: 0.001,
} }
} }
override_base_feature_extractor_hyperparams: true
} }
loss { loss {
classification_loss { classification_loss {
......
...@@ -98,6 +98,7 @@ model { ...@@ -98,6 +98,7 @@ model {
epsilon: 0.01, epsilon: 0.01,
} }
} }
override_base_feature_extractor_hyperparams: true
} }
loss { loss {
classification_loss { classification_loss {
......
...@@ -69,10 +69,13 @@ def create_input_queue(batch_size_per_clone, create_tensor_dict_fn, ...@@ -69,10 +69,13 @@ def create_input_queue(batch_size_per_clone, create_tensor_dict_fn,
in tensor_dict) in tensor_dict)
include_keypoints = (fields.InputDataFields.groundtruth_keypoints include_keypoints = (fields.InputDataFields.groundtruth_keypoints
in tensor_dict) in tensor_dict)
include_multiclass_scores = (fields.InputDataFields.multiclass_scores
in tensor_dict)
if data_augmentation_options: if data_augmentation_options:
tensor_dict = preprocessor.preprocess( tensor_dict = preprocessor.preprocess(
tensor_dict, data_augmentation_options, tensor_dict, data_augmentation_options,
func_arg_map=preprocessor.get_default_func_arg_map( func_arg_map=preprocessor.get_default_func_arg_map(
include_multiclass_scores=include_multiclass_scores,
include_instance_masks=include_instance_masks, include_instance_masks=include_instance_masks,
include_keypoints=include_keypoints)) include_keypoints=include_keypoints))
...@@ -85,7 +88,10 @@ def create_input_queue(batch_size_per_clone, create_tensor_dict_fn, ...@@ -85,7 +88,10 @@ def create_input_queue(batch_size_per_clone, create_tensor_dict_fn,
return input_queue return input_queue
def get_inputs(input_queue, num_classes, merge_multiple_label_boxes=False): def get_inputs(input_queue,
num_classes,
merge_multiple_label_boxes=False,
use_multiclass_scores=False):
"""Dequeues batch and constructs inputs to object detection model. """Dequeues batch and constructs inputs to object detection model.
Args: Args:
...@@ -95,6 +101,8 @@ def get_inputs(input_queue, num_classes, merge_multiple_label_boxes=False): ...@@ -95,6 +101,8 @@ def get_inputs(input_queue, num_classes, merge_multiple_label_boxes=False):
or not. Defaults to false. Merged boxes are represented with a single or not. Defaults to false. Merged boxes are represented with a single
box and a k-hot encoding of the multiple labels associated with the box and a k-hot encoding of the multiple labels associated with the
boxes. boxes.
use_multiclass_scores: Whether to use multiclass scores instead of
groundtruth_classes.
Returns: Returns:
images: a list of 3-D float tensor of images. images: a list of 3-D float tensor of images.
...@@ -123,9 +131,19 @@ def get_inputs(input_queue, num_classes, merge_multiple_label_boxes=False): ...@@ -123,9 +131,19 @@ def get_inputs(input_queue, num_classes, merge_multiple_label_boxes=False):
classes_gt = tf.cast(read_data[fields.InputDataFields.groundtruth_classes], classes_gt = tf.cast(read_data[fields.InputDataFields.groundtruth_classes],
tf.int32) tf.int32)
classes_gt -= label_id_offset classes_gt -= label_id_offset
if merge_multiple_label_boxes and use_multiclass_scores:
raise ValueError(
'Using both merge_multiple_label_boxes and use_multiclass_scores is'
'not supported'
)
if merge_multiple_label_boxes: if merge_multiple_label_boxes:
location_gt, classes_gt, _ = util_ops.merge_boxes_with_multiple_labels( location_gt, classes_gt, _ = util_ops.merge_boxes_with_multiple_labels(
location_gt, classes_gt, num_classes) location_gt, classes_gt, num_classes)
elif use_multiclass_scores:
classes_gt = tf.cast(read_data[fields.InputDataFields.multiclass_scores],
tf.float32)
else: else:
classes_gt = util_ops.padded_one_hot_encoding( classes_gt = util_ops.padded_one_hot_encoding(
indices=classes_gt, depth=num_classes, left_pad=0) indices=classes_gt, depth=num_classes, left_pad=0)
...@@ -155,7 +173,8 @@ def _create_losses(input_queue, create_model_fn, train_config): ...@@ -155,7 +173,8 @@ def _create_losses(input_queue, create_model_fn, train_config):
groundtruth_masks_list, groundtruth_keypoints_list, _) = get_inputs( groundtruth_masks_list, groundtruth_keypoints_list, _) = get_inputs(
input_queue, input_queue,
detection_model.num_classes, detection_model.num_classes,
train_config.merge_multiple_label_boxes) train_config.merge_multiple_label_boxes,
train_config.use_multiclass_scores)
preprocessed_images = [] preprocessed_images = []
true_image_shapes = [] true_image_shapes = []
...@@ -183,9 +202,19 @@ def _create_losses(input_queue, create_model_fn, train_config): ...@@ -183,9 +202,19 @@ def _create_losses(input_queue, create_model_fn, train_config):
tf.losses.add_loss(loss_tensor) tf.losses.add_loss(loss_tensor)
def train(create_tensor_dict_fn, create_model_fn, train_config, master, task, def train(create_tensor_dict_fn,
num_clones, worker_replicas, clone_on_cpu, ps_tasks, worker_job_name, create_model_fn,
is_chief, train_dir, graph_hook_fn=None): train_config,
master,
task,
num_clones,
worker_replicas,
clone_on_cpu,
ps_tasks,
worker_job_name,
is_chief,
train_dir,
graph_hook_fn=None):
"""Training function for detection models. """Training function for detection models.
Args: Args:
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment