Commit 6b72b5cd authored by Zhichao Lu's avatar Zhichao Lu Committed by pkulzc
Browse files

Merged commit includes the following changes:

191649512  by Zhichao Lu:

    Introduce two parameters in ssd.proto - freeze_batchnorm, inplace_batchnorm_update - and set up slim arg_scopes in ssd_meta_arch.py such that applies it to all batchnorm ops in the predict() method.

    This centralizes the control of freezing and doing inplace batchnorm updates.

--
191620303  by Zhichao Lu:

    Modifications to the preprocessor to support multiclass scores

--
191610773  by Zhichao Lu:

    Adding multiclass_scores to InputDataFields and adding padding for multiclass_scores.

--
191595011  by Zhichao Lu:

    Contains implementation of the detection metric for the Open Images Challenge.

--
191449408  by Zhichao Lu:

    Change hyperparams_builder to return a callable so the users can inherit values from outer arg_scopes. This allows us to easily set batch_norm parameters like "is_training" and "inplace_batchnorm_update" for all feature extractors from the base class and propagate it correctly to the nested scopes.

--
191437008  by Zhichao Lu:

    Contains implementation of the Recall@N and MedianRank@N metrics.

--
191385254  by Zhichao Lu:

    Add config rewrite flag to eval.py

--
191382500  by Zhichao Lu:

    Fix bug for config_util.

--

PiperOrigin-RevId: 191649512
parent 143464d2
...@@ -26,6 +26,10 @@ from object_detection.utils import test_case ...@@ -26,6 +26,10 @@ from object_detection.utils import test_case
class SsdFeatureExtractorTestBase(test_case.TestCase): class SsdFeatureExtractorTestBase(test_case.TestCase):
def conv_hyperparams_fn(self):
with tf.contrib.slim.arg_scope([]) as sc:
return sc
@abstractmethod @abstractmethod
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
use_explicit_padding=False): use_explicit_padding=False):
......
...@@ -33,12 +33,10 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -33,12 +33,10 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
depth_multiplier, depth_multiplier,
min_depth, min_depth,
pad_to_multiple, pad_to_multiple,
conv_hyperparams, conv_hyperparams_fn,
batch_norm_trainable=True,
reuse_weights=None, reuse_weights=None,
use_explicit_padding=False, use_explicit_padding=False,
use_depthwise=False, use_depthwise=False):
inplace_batchnorm_update=False):
"""InceptionV2 Feature Extractor for SSD Models. """InceptionV2 Feature Extractor for SSD Models.
Args: Args:
...@@ -47,25 +45,16 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -47,25 +45,16 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
min_depth: minimum feature extractor depth. min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
batch_norm_trainable: Whether to update batch norm parameters during and separable_conv2d ops.
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False. use_depthwise: Whether to use depthwise convolutions. Default is False.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
""" """
super(SSDInceptionV2FeatureExtractor, self).__init__( super(SSDInceptionV2FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights, conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise)
use_explicit_padding, use_depthwise, inplace_batchnorm_update)
def preprocess(self, resized_inputs): def preprocess(self, resized_inputs):
"""SSD preprocessing. """SSD preprocessing.
...@@ -82,7 +71,7 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -82,7 +71,7 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
""" """
return (2.0 / 255.0) * resized_inputs - 1.0 return (2.0 / 255.0) * resized_inputs - 1.0
def _extract_features(self, preprocessed_inputs): def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs. """Extract features from preprocessed inputs.
Args: Args:
...@@ -103,7 +92,7 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -103,7 +92,7 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
'use_depthwise': self._use_depthwise, 'use_depthwise': self._use_depthwise,
} }
with slim.arg_scope(self._conv_hyperparams): with slim.arg_scope(self._conv_hyperparams_fn()):
with tf.variable_scope('InceptionV2', with tf.variable_scope('InceptionV2',
reuse=self._reuse_weights) as scope: reuse=self._reuse_weights) as scope:
_, image_features = inception_v2.inception_v2_base( _, image_features = inception_v2.inception_v2_base(
......
...@@ -25,7 +25,7 @@ class SsdInceptionV2FeatureExtractorTest( ...@@ -25,7 +25,7 @@ class SsdInceptionV2FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase): ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, batch_norm_trainable=True): is_training=True):
"""Constructs a SsdInceptionV2FeatureExtractor. """Constructs a SsdInceptionV2FeatureExtractor.
Args: Args:
...@@ -33,16 +33,14 @@ class SsdInceptionV2FeatureExtractorTest( ...@@ -33,16 +33,14 @@ class SsdInceptionV2FeatureExtractorTest(
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
is_training: whether the network is in training mode. is_training: whether the network is in training mode.
batch_norm_trainable: Whether to update batch norm parameters during
training or not
Returns: Returns:
an ssd_inception_v2_feature_extractor.SsdInceptionV2FeatureExtractor. an ssd_inception_v2_feature_extractor.SsdInceptionV2FeatureExtractor.
""" """
min_depth = 32 min_depth = 32
conv_hyperparams = {}
return ssd_inception_v2_feature_extractor.SSDInceptionV2FeatureExtractor( return ssd_inception_v2_feature_extractor.SSDInceptionV2FeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable) self.conv_hyperparams_fn)
def test_extract_features_returns_correct_shapes_128(self): def test_extract_features_returns_correct_shapes_128(self):
image_height = 128 image_height = 128
......
...@@ -33,12 +33,10 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -33,12 +33,10 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
depth_multiplier, depth_multiplier,
min_depth, min_depth,
pad_to_multiple, pad_to_multiple,
conv_hyperparams, conv_hyperparams_fn,
batch_norm_trainable=True,
reuse_weights=None, reuse_weights=None,
use_explicit_padding=False, use_explicit_padding=False,
use_depthwise=False, use_depthwise=False):
inplace_batchnorm_update=False):
"""InceptionV3 Feature Extractor for SSD Models. """InceptionV3 Feature Extractor for SSD Models.
Args: Args:
...@@ -47,25 +45,16 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -47,25 +45,16 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
min_depth: minimum feature extractor depth. min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
batch_norm_trainable: Whether to update batch norm parameters during and separable_conv2d ops.
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False. use_depthwise: Whether to use depthwise convolutions. Default is False.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
""" """
super(SSDInceptionV3FeatureExtractor, self).__init__( super(SSDInceptionV3FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights, conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise)
use_explicit_padding, use_depthwise, inplace_batchnorm_update)
def preprocess(self, resized_inputs): def preprocess(self, resized_inputs):
"""SSD preprocessing. """SSD preprocessing.
...@@ -82,7 +71,7 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -82,7 +71,7 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
""" """
return (2.0 / 255.0) * resized_inputs - 1.0 return (2.0 / 255.0) * resized_inputs - 1.0
def _extract_features(self, preprocessed_inputs): def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs. """Extract features from preprocessed inputs.
Args: Args:
...@@ -103,7 +92,7 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -103,7 +92,7 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
'use_depthwise': self._use_depthwise, 'use_depthwise': self._use_depthwise,
} }
with slim.arg_scope(self._conv_hyperparams): with slim.arg_scope(self._conv_hyperparams_fn()):
with tf.variable_scope('InceptionV3', reuse=self._reuse_weights) as scope: with tf.variable_scope('InceptionV3', reuse=self._reuse_weights) as scope:
_, image_features = inception_v3.inception_v3_base( _, image_features = inception_v3.inception_v3_base(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
......
...@@ -25,7 +25,7 @@ class SsdInceptionV3FeatureExtractorTest( ...@@ -25,7 +25,7 @@ class SsdInceptionV3FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase): ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, batch_norm_trainable=True): is_training=True):
"""Constructs a SsdInceptionV3FeatureExtractor. """Constructs a SsdInceptionV3FeatureExtractor.
Args: Args:
...@@ -33,16 +33,14 @@ class SsdInceptionV3FeatureExtractorTest( ...@@ -33,16 +33,14 @@ class SsdInceptionV3FeatureExtractorTest(
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
is_training: whether the network is in training mode. is_training: whether the network is in training mode.
batch_norm_trainable: Whether to update batch norm parameters during
training or not
Returns: Returns:
an ssd_inception_v3_feature_extractor.SsdInceptionV3FeatureExtractor. an ssd_inception_v3_feature_extractor.SsdInceptionV3FeatureExtractor.
""" """
min_depth = 32 min_depth = 32
conv_hyperparams = {}
return ssd_inception_v3_feature_extractor.SSDInceptionV3FeatureExtractor( return ssd_inception_v3_feature_extractor.SSDInceptionV3FeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable) self.conv_hyperparams_fn)
def test_extract_features_returns_correct_shapes_128(self): def test_extract_features_returns_correct_shapes_128(self):
image_height = 128 image_height = 128
......
...@@ -34,12 +34,10 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -34,12 +34,10 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
depth_multiplier, depth_multiplier,
min_depth, min_depth,
pad_to_multiple, pad_to_multiple,
conv_hyperparams, conv_hyperparams_fn,
batch_norm_trainable=True,
reuse_weights=None, reuse_weights=None,
use_explicit_padding=False, use_explicit_padding=False,
use_depthwise=False, use_depthwise=False):
inplace_batchnorm_update=False):
"""MobileNetV1 Feature Extractor for SSD Models. """MobileNetV1 Feature Extractor for SSD Models.
Args: Args:
...@@ -48,26 +46,17 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -48,26 +46,17 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
min_depth: minimum feature extractor depth. min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
batch_norm_trainable: Whether to update batch norm parameters during and separable_conv2d ops.
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding inputs so that the output dimensions are the same as if 'SAME' padding
were used. were used.
use_depthwise: Whether to use depthwise convolutions. Default is False. use_depthwise: Whether to use depthwise convolutions. Default is False.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
""" """
super(SSDMobileNetV1FeatureExtractor, self).__init__( super(SSDMobileNetV1FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights, conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise)
use_explicit_padding, use_depthwise, inplace_batchnorm_update)
def preprocess(self, resized_inputs): def preprocess(self, resized_inputs):
"""SSD preprocessing. """SSD preprocessing.
...@@ -84,7 +73,7 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -84,7 +73,7 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
""" """
return (2.0 / 255.0) * resized_inputs - 1.0 return (2.0 / 255.0) * resized_inputs - 1.0
def _extract_features(self, preprocessed_inputs): def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs. """Extract features from preprocessed inputs.
Args: Args:
...@@ -109,8 +98,7 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -109,8 +98,7 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
with tf.variable_scope('MobilenetV1', with tf.variable_scope('MobilenetV1',
reuse=self._reuse_weights) as scope: reuse=self._reuse_weights) as scope:
with slim.arg_scope( with slim.arg_scope(
mobilenet_v1.mobilenet_v1_arg_scope( mobilenet_v1.mobilenet_v1_arg_scope(is_training=None)):
is_training=(self._batch_norm_trainable and self._is_training))):
# TODO(skligys): Enable fused batch norm once quantization supports it. # TODO(skligys): Enable fused batch norm once quantization supports it.
with slim.arg_scope([slim.batch_norm], fused=False): with slim.arg_scope([slim.batch_norm], fused=False):
_, image_features = mobilenet_v1.mobilenet_v1_base( _, image_features = mobilenet_v1.mobilenet_v1_base(
...@@ -120,7 +108,7 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -120,7 +108,7 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
depth_multiplier=self._depth_multiplier, depth_multiplier=self._depth_multiplier,
use_explicit_padding=self._use_explicit_padding, use_explicit_padding=self._use_explicit_padding,
scope=scope) scope=scope)
with slim.arg_scope(self._conv_hyperparams): with slim.arg_scope(self._conv_hyperparams_fn()):
# TODO(skligys): Enable fused batch norm once quantization supports it. # TODO(skligys): Enable fused batch norm once quantization supports it.
with slim.arg_scope([slim.batch_norm], fused=False): with slim.arg_scope([slim.batch_norm], fused=False):
feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_maps = feature_map_generators.multi_resolution_feature_maps(
......
...@@ -27,8 +27,7 @@ class SsdMobilenetV1FeatureExtractorTest( ...@@ -27,8 +27,7 @@ class SsdMobilenetV1FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase): ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, batch_norm_trainable=True, is_training=True, use_explicit_padding=False):
use_explicit_padding=False):
"""Constructs a new feature extractor. """Constructs a new feature extractor.
Args: Args:
...@@ -36,8 +35,6 @@ class SsdMobilenetV1FeatureExtractorTest( ...@@ -36,8 +35,6 @@ class SsdMobilenetV1FeatureExtractorTest(
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
is_training: whether the network is in training mode. is_training: whether the network is in training mode.
batch_norm_trainable: Whether to update batch norm parameters during
training or not.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding inputs so that the output dimensions are the same as if 'SAME' padding
were used. were used.
...@@ -45,11 +42,9 @@ class SsdMobilenetV1FeatureExtractorTest( ...@@ -45,11 +42,9 @@ class SsdMobilenetV1FeatureExtractorTest(
an ssd_meta_arch.SSDFeatureExtractor object. an ssd_meta_arch.SSDFeatureExtractor object.
""" """
min_depth = 32 min_depth = 32
with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm) as sc:
conv_hyperparams = sc
return ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor( return ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable=batch_norm_trainable, self.conv_hyperparams_fn,
use_explicit_padding=use_explicit_padding) use_explicit_padding=use_explicit_padding)
def test_extract_features_returns_correct_shapes_128(self): def test_extract_features_returns_correct_shapes_128(self):
......
...@@ -35,12 +35,10 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -35,12 +35,10 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
depth_multiplier, depth_multiplier,
min_depth, min_depth,
pad_to_multiple, pad_to_multiple,
conv_hyperparams, conv_hyperparams_fn,
batch_norm_trainable=True,
reuse_weights=None, reuse_weights=None,
use_explicit_padding=False, use_explicit_padding=False,
use_depthwise=False, use_depthwise=False):
inplace_batchnorm_update=False):
"""MobileNetV2 Feature Extractor for SSD Models. """MobileNetV2 Feature Extractor for SSD Models.
Mobilenet v2 (experimental), designed by sandler@. More details can be found Mobilenet v2 (experimental), designed by sandler@. More details can be found
...@@ -52,25 +50,16 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -52,25 +50,16 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
min_depth: minimum feature extractor depth. min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
batch_norm_trainable: Whether to update batch norm parameters during and separable_conv2d ops.
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False. use_depthwise: Whether to use depthwise convolutions. Default is False.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
""" """
super(SSDMobileNetV2FeatureExtractor, self).__init__( super(SSDMobileNetV2FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights, conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise)
use_explicit_padding, use_depthwise, inplace_batchnorm_update)
def preprocess(self, resized_inputs): def preprocess(self, resized_inputs):
"""SSD preprocessing. """SSD preprocessing.
...@@ -87,7 +76,7 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -87,7 +76,7 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
""" """
return (2.0 / 255.0) * resized_inputs - 1.0 return (2.0 / 255.0) * resized_inputs - 1.0
def _extract_features(self, preprocessed_inputs): def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs. """Extract features from preprocessed inputs.
Args: Args:
...@@ -110,9 +99,7 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -110,9 +99,7 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope: with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope:
with slim.arg_scope( with slim.arg_scope(
mobilenet_v2.training_scope( mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \
is_training=(self._is_training and self._batch_norm_trainable),
bn_decay=0.9997)), \
slim.arg_scope( slim.arg_scope(
[mobilenet.depth_multiplier], min_depth=self._min_depth): [mobilenet.depth_multiplier], min_depth=self._min_depth):
# TODO(b/68150321): Enable fused batch norm once quantization # TODO(b/68150321): Enable fused batch norm once quantization
...@@ -124,7 +111,7 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -124,7 +111,7 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
depth_multiplier=self._depth_multiplier, depth_multiplier=self._depth_multiplier,
use_explicit_padding=self._use_explicit_padding, use_explicit_padding=self._use_explicit_padding,
scope=scope) scope=scope)
with slim.arg_scope(self._conv_hyperparams): with slim.arg_scope(self._conv_hyperparams_fn()):
# TODO(b/68150321): Enable fused batch norm once quantization # TODO(b/68150321): Enable fused batch norm once quantization
# supports it. # supports it.
with slim.arg_scope([slim.batch_norm], fused=False): with slim.arg_scope([slim.batch_norm], fused=False):
......
...@@ -41,14 +41,12 @@ class SsdMobilenetV2FeatureExtractorTest( ...@@ -41,14 +41,12 @@ class SsdMobilenetV2FeatureExtractorTest(
an ssd_meta_arch.SSDFeatureExtractor object. an ssd_meta_arch.SSDFeatureExtractor object.
""" """
min_depth = 32 min_depth = 32
with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm) as sc:
conv_hyperparams = sc
return ssd_mobilenet_v2_feature_extractor.SSDMobileNetV2FeatureExtractor( return ssd_mobilenet_v2_feature_extractor.SSDMobileNetV2FeatureExtractor(
False, False,
depth_multiplier, depth_multiplier,
min_depth, min_depth,
pad_to_multiple, pad_to_multiple,
conv_hyperparams, self.conv_hyperparams_fn,
use_explicit_padding=use_explicit_padding) use_explicit_padding=use_explicit_padding)
def test_extract_features_returns_correct_shapes_128(self): def test_extract_features_returns_correct_shapes_128(self):
......
...@@ -36,15 +36,13 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -36,15 +36,13 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
depth_multiplier, depth_multiplier,
min_depth, min_depth,
pad_to_multiple, pad_to_multiple,
conv_hyperparams, conv_hyperparams_fn,
resnet_base_fn, resnet_base_fn,
resnet_scope_name, resnet_scope_name,
fpn_scope_name, fpn_scope_name,
batch_norm_trainable=True,
reuse_weights=None, reuse_weights=None,
use_explicit_padding=False, use_explicit_padding=False,
use_depthwise=False, use_depthwise=False):
inplace_batchnorm_update=False):
"""SSD FPN feature extractor based on Resnet v1 architecture. """SSD FPN feature extractor based on Resnet v1 architecture.
Args: Args:
...@@ -54,32 +52,23 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -54,32 +52,23 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
min_depth: minimum feature extractor depth. UNUSED Currently. min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops.
resnet_base_fn: base resnet network to use. resnet_base_fn: base resnet network to use.
resnet_scope_name: scope name under which to construct resnet resnet_scope_name: scope name under which to construct resnet
fpn_scope_name: scope name under which to construct the feature pyramid fpn_scope_name: scope name under which to construct the feature pyramid
network. network.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently. features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently. use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
Raises: Raises:
ValueError: On supplying invalid arguments for unused arguments. ValueError: On supplying invalid arguments for unused arguments.
""" """
super(_SSDResnetV1FpnFeatureExtractor, self).__init__( super(_SSDResnetV1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights, conv_hyperparams_fn, reuse_weights, use_explicit_padding)
use_explicit_padding, inplace_batchnorm_update)
if self._depth_multiplier != 1.0: if self._depth_multiplier != 1.0:
raise ValueError('Only depth 1.0 is supported, found: {}'. raise ValueError('Only depth 1.0 is supported, found: {}'.
format(self._depth_multiplier)) format(self._depth_multiplier))
...@@ -116,7 +105,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -116,7 +105,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
filtered_image_features[feature_name] = feature filtered_image_features[feature_name] = feature
return filtered_image_features return filtered_image_features
def _extract_features(self, preprocessed_inputs): def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs. """Extract features from preprocessed inputs.
Args: Args:
...@@ -143,7 +132,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -143,7 +132,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
inputs=ops.pad_to_multiple(preprocessed_inputs, inputs=ops.pad_to_multiple(preprocessed_inputs,
self._pad_to_multiple), self._pad_to_multiple),
num_classes=None, num_classes=None,
is_training=self._is_training and self._batch_norm_trainable, is_training=None,
global_pool=False, global_pool=False,
output_stride=None, output_stride=None,
store_non_strided_activations=True, store_non_strided_activations=True,
...@@ -151,7 +140,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -151,7 +140,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
image_features = self._filter_features(image_features) image_features = self._filter_features(image_features)
last_feature_map = image_features['block4'] last_feature_map = image_features['block4']
with tf.variable_scope(self._fpn_scope_name, reuse=self._reuse_weights): with tf.variable_scope(self._fpn_scope_name, reuse=self._reuse_weights):
with slim.arg_scope(self._conv_hyperparams): with slim.arg_scope(self._conv_hyperparams_fn()):
for i in range(5, 7): for i in range(5, 7):
last_feature_map = slim.conv2d( last_feature_map = slim.conv2d(
last_feature_map, last_feature_map,
...@@ -179,11 +168,9 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor): ...@@ -179,11 +168,9 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
min_depth, min_depth,
pad_to_multiple, pad_to_multiple,
conv_hyperparams, conv_hyperparams,
batch_norm_trainable=True,
reuse_weights=None, reuse_weights=None,
use_explicit_padding=False, use_explicit_padding=False,
use_depthwise=False, use_depthwise=False):
inplace_batchnorm_update=False):
"""Resnet50 v1 FPN Feature Extractor for SSD Models. """Resnet50 v1 FPN Feature Extractor for SSD Models.
Args: Args:
...@@ -193,25 +180,15 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor): ...@@ -193,25 +180,15 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently. features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently. use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
""" """
super(SSDResnet50V1FpnFeatureExtractor, self).__init__( super(SSDResnet50V1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, resnet_v1.resnet_v1_50, 'resnet_v1_50', 'fpn', conv_hyperparams, resnet_v1.resnet_v1_50, 'resnet_v1_50', 'fpn',
batch_norm_trainable, reuse_weights, use_explicit_padding, reuse_weights, use_explicit_padding)
inplace_batchnorm_update)
class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor): class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
...@@ -222,11 +199,9 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor): ...@@ -222,11 +199,9 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
min_depth, min_depth,
pad_to_multiple, pad_to_multiple,
conv_hyperparams, conv_hyperparams,
batch_norm_trainable=True,
reuse_weights=None, reuse_weights=None,
use_explicit_padding=False, use_explicit_padding=False,
use_depthwise=False, use_depthwise=False):
inplace_batchnorm_update=False):
"""Resnet101 v1 FPN Feature Extractor for SSD Models. """Resnet101 v1 FPN Feature Extractor for SSD Models.
Args: Args:
...@@ -236,25 +211,15 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor): ...@@ -236,25 +211,15 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently. features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently. use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
""" """
super(SSDResnet101V1FpnFeatureExtractor, self).__init__( super(SSDResnet101V1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, resnet_v1.resnet_v1_101, 'resnet_v1_101', 'fpn', conv_hyperparams, resnet_v1.resnet_v1_101, 'resnet_v1_101', 'fpn',
batch_norm_trainable, reuse_weights, use_explicit_padding, reuse_weights, use_explicit_padding)
inplace_batchnorm_update)
class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor): class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
...@@ -265,11 +230,9 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor): ...@@ -265,11 +230,9 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
min_depth, min_depth,
pad_to_multiple, pad_to_multiple,
conv_hyperparams, conv_hyperparams,
batch_norm_trainable=True,
reuse_weights=None, reuse_weights=None,
use_explicit_padding=False, use_explicit_padding=False,
use_depthwise=False, use_depthwise=False):
inplace_batchnorm_update=False):
"""Resnet152 v1 FPN Feature Extractor for SSD Models. """Resnet152 v1 FPN Feature Extractor for SSD Models.
Args: Args:
...@@ -279,22 +242,12 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor): ...@@ -279,22 +242,12 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
pad_to_multiple: the nearest multiple to zero pad the input height and pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently. features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently. use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
""" """
super(SSDResnet152V1FpnFeatureExtractor, self).__init__( super(SSDResnet152V1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, resnet_v1.resnet_v1_152, 'resnet_v1_152', 'fpn', conv_hyperparams, resnet_v1.resnet_v1_152, 'resnet_v1_152', 'fpn',
batch_norm_trainable, reuse_weights, use_explicit_padding, reuse_weights, use_explicit_padding)
inplace_batchnorm_update)
...@@ -27,13 +27,10 @@ class SSDResnet50V1FeatureExtractorTest( ...@@ -27,13 +27,10 @@ class SSDResnet50V1FeatureExtractorTest(
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
use_explicit_padding=False): use_explicit_padding=False):
min_depth = 32 min_depth = 32
conv_hyperparams = {}
batch_norm_trainable = True
is_training = True is_training = True
return ssd_resnet_v1_fpn_feature_extractor.SSDResnet50V1FpnFeatureExtractor( return ssd_resnet_v1_fpn_feature_extractor.SSDResnet50V1FpnFeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, self.conv_hyperparams_fn, use_explicit_padding=use_explicit_padding)
use_explicit_padding=use_explicit_padding)
def _resnet_scope_name(self): def _resnet_scope_name(self):
return 'resnet_v1_50' return 'resnet_v1_50'
...@@ -47,13 +44,14 @@ class SSDResnet101V1FeatureExtractorTest( ...@@ -47,13 +44,14 @@ class SSDResnet101V1FeatureExtractorTest(
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
use_explicit_padding=False): use_explicit_padding=False):
min_depth = 32 min_depth = 32
conv_hyperparams = {}
batch_norm_trainable = True
is_training = True is_training = True
return ( return (
ssd_resnet_v1_fpn_feature_extractor.SSDResnet101V1FpnFeatureExtractor( ssd_resnet_v1_fpn_feature_extractor.SSDResnet101V1FpnFeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training,
conv_hyperparams, batch_norm_trainable, depth_multiplier,
min_depth,
pad_to_multiple,
self.conv_hyperparams_fn,
use_explicit_padding=use_explicit_padding)) use_explicit_padding=use_explicit_padding))
def _resnet_scope_name(self): def _resnet_scope_name(self):
...@@ -68,13 +66,14 @@ class SSDResnet152V1FeatureExtractorTest( ...@@ -68,13 +66,14 @@ class SSDResnet152V1FeatureExtractorTest(
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
use_explicit_padding=False): use_explicit_padding=False):
min_depth = 32 min_depth = 32
conv_hyperparams = {}
batch_norm_trainable = True
is_training = True is_training = True
return ( return (
ssd_resnet_v1_fpn_feature_extractor.SSDResnet152V1FpnFeatureExtractor( ssd_resnet_v1_fpn_feature_extractor.SSDResnet152V1FpnFeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple, is_training,
conv_hyperparams, batch_norm_trainable, depth_multiplier,
min_depth,
pad_to_multiple,
self.conv_hyperparams_fn,
use_explicit_padding=use_explicit_padding)) use_explicit_padding=use_explicit_padding))
def _resnet_scope_name(self): def _resnet_scope_name(self):
......
...@@ -60,6 +60,21 @@ message Ssd { ...@@ -60,6 +60,21 @@ message Ssd {
// Loss configuration for training. // Loss configuration for training.
optional Loss loss = 11; optional Loss loss = 11;
// Whether to update batch norm parameters during training or not.
// When training with a relative small batch size (e.g. 1), it is
// desirable to disable batch norm update and use pretrained batch norm
// params.
//
// Note: Some feature extractors are used with canned arg_scopes
// (e.g resnet arg scopes). In these cases training behavior of batch norm
// variables may depend on both values of `batch_norm_trainable` and
// `is_training`.
//
// When canned arg_scopes are used with feature extractors `conv_hyperparams`
// will apply only to the additional layers that are added and are outside the
// canned arg_scope.
optional bool freeze_batchnorm = 16 [default = false];
// Whether to update batch_norm inplace during training. This is required // Whether to update batch_norm inplace during training. This is required
// for batch norm to work correctly on TPUs. When this is false, user must add // for batch norm to work correctly on TPUs. When this is false, user must add
// a control dependency on tf.GraphKeys.UPDATE_OPS for train/loss op in order // a control dependency on tf.GraphKeys.UPDATE_OPS for train/loss op in order
...@@ -69,6 +84,8 @@ message Ssd { ...@@ -69,6 +84,8 @@ message Ssd {
message SsdFeatureExtractor { message SsdFeatureExtractor {
reserved 6;
// Type of ssd feature extractor. // Type of ssd feature extractor.
optional string type = 1; optional string type = 1;
...@@ -87,21 +104,6 @@ message SsdFeatureExtractor { ...@@ -87,21 +104,6 @@ message SsdFeatureExtractor {
// until the resulting dimensions are even. // until the resulting dimensions are even.
optional int32 pad_to_multiple = 5 [default = 1]; optional int32 pad_to_multiple = 5 [default = 1];
// Whether to update batch norm parameters during training or not.
// When training with a relative small batch size (e.g. 1), it is
// desirable to disable batch norm update and use pretrained batch norm
// params.
//
// Note: Some feature extractors are used with canned arg_scopes
// (e.g resnet arg scopes). In these cases training behavior of batch norm
// variables may depend on both values of `batch_norm_trainable` and
// `is_training`.
//
// When canned arg_scopes are used with feature extractors `conv_hyperparams`
// will apply only to the additional layers that are added and are outside the
// canned arg_scope.
optional bool batch_norm_trainable = 6 [default=true];
// Whether to use explicit padding when extracting SSD multiresolution // Whether to use explicit padding when extracting SSD multiresolution
// features. Note that this does not apply to the base feature extractor. // features. Note that this does not apply to the base feature extractor.
optional bool use_explicit_padding = 7 [default=false]; optional bool use_explicit_padding = 7 [default=false];
......
...@@ -63,8 +63,10 @@ def get_spatial_image_size(image_resizer_config): ...@@ -63,8 +63,10 @@ def get_spatial_image_size(image_resizer_config):
ValueError: If the model type is not recognized. ValueError: If the model type is not recognized.
""" """
if image_resizer_config.HasField("fixed_shape_resizer"): if image_resizer_config.HasField("fixed_shape_resizer"):
return [image_resizer_config.fixed_shape_resizer.height, return [
image_resizer_config.fixed_shape_resizer.width] image_resizer_config.fixed_shape_resizer.height,
image_resizer_config.fixed_shape_resizer.width
]
if image_resizer_config.HasField("keep_aspect_ratio_resizer"): if image_resizer_config.HasField("keep_aspect_ratio_resizer"):
if image_resizer_config.keep_aspect_ratio_resizer.pad_to_max_dimension: if image_resizer_config.keep_aspect_ratio_resizer.pad_to_max_dimension:
return [image_resizer_config.keep_aspect_ratio_resizer.max_dimension] * 2 return [image_resizer_config.keep_aspect_ratio_resizer.max_dimension] * 2
...@@ -74,7 +76,7 @@ def get_spatial_image_size(image_resizer_config): ...@@ -74,7 +76,7 @@ def get_spatial_image_size(image_resizer_config):
def get_configs_from_pipeline_file(pipeline_config_path): def get_configs_from_pipeline_file(pipeline_config_path):
"""Reads configuration from a pipeline_pb2.TrainEvalPipelineConfig. """Reads config from a file containing pipeline_pb2.TrainEvalPipelineConfig.
Args: Args:
pipeline_config_path: Path to pipeline_pb2.TrainEvalPipelineConfig text pipeline_config_path: Path to pipeline_pb2.TrainEvalPipelineConfig text
...@@ -89,23 +91,34 @@ def get_configs_from_pipeline_file(pipeline_config_path): ...@@ -89,23 +91,34 @@ def get_configs_from_pipeline_file(pipeline_config_path):
with tf.gfile.GFile(pipeline_config_path, "r") as f: with tf.gfile.GFile(pipeline_config_path, "r") as f:
proto_str = f.read() proto_str = f.read()
text_format.Merge(proto_str, pipeline_config) text_format.Merge(proto_str, pipeline_config)
return create_configs_from_pipeline_proto(pipeline_config)
def create_configs_from_pipeline_proto(pipeline_config):
"""Creates a configs dictionary from pipeline_pb2.TrainEvalPipelineConfig.
Args:
pipeline_config: pipeline_pb2.TrainEvalPipelineConfig proto object.
Returns:
Dictionary of configuration objects. Keys are `model`, `train_config`,
`train_input_config`, `eval_config`, `eval_input_config`. Value are the
corresponding config objects.
"""
configs = {} configs = {}
configs["model"] = pipeline_config.model configs["model"] = pipeline_config.model
configs["train_config"] = pipeline_config.train_config configs["train_config"] = pipeline_config.train_config
configs["train_input_config"] = pipeline_config.train_input_reader configs["train_input_config"] = pipeline_config.train_input_reader
configs["eval_config"] = pipeline_config.eval_config configs["eval_config"] = pipeline_config.eval_config
configs["eval_input_config"] = pipeline_config.eval_input_reader configs["eval_input_config"] = pipeline_config.eval_input_reader
return configs return configs
def create_pipeline_proto_from_configs(configs): def create_pipeline_proto_from_configs(configs):
"""Creates a pipeline_pb2.TrainEvalPipelineConfig from configs dictionary. """Creates a pipeline_pb2.TrainEvalPipelineConfig from configs dictionary.
This function nearly performs the inverse operation of This function performs the inverse operation of
get_configs_from_pipeline_file(). Instead of returning a file path, it returns create_configs_from_pipeline_proto().
a `TrainEvalPipelineConfig` object.
Args: Args:
configs: Dictionary of configs. See get_configs_from_pipeline_file(). configs: Dictionary of configs. See get_configs_from_pipeline_file().
...@@ -437,7 +450,7 @@ def _get_classification_loss(model_config): ...@@ -437,7 +450,7 @@ def _get_classification_loss(model_config):
if meta_architecture == "faster_rcnn": if meta_architecture == "faster_rcnn":
model = model_config.faster_rcnn model = model_config.faster_rcnn
classification_loss = model.second_stage_classification_loss classification_loss = model.second_stage_classification_loss
if meta_architecture == "ssd": elif meta_architecture == "ssd":
model = model_config.ssd model = model_config.ssd
classification_loss = model.loss.classification_loss classification_loss = model.loss.classification_loss
else: else:
......
...@@ -93,6 +93,26 @@ class ConfigUtilTest(tf.test.TestCase): ...@@ -93,6 +93,26 @@ class ConfigUtilTest(tf.test.TestCase):
self.assertProtoEquals(pipeline_config.eval_input_reader, self.assertProtoEquals(pipeline_config.eval_input_reader,
configs["eval_input_config"]) configs["eval_input_config"])
def test_create_configs_from_pipeline_proto(self):
"""Tests creating configs dictionary from pipeline proto."""
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.model.faster_rcnn.num_classes = 10
pipeline_config.train_config.batch_size = 32
pipeline_config.train_input_reader.label_map_path = "path/to/label_map"
pipeline_config.eval_config.num_examples = 20
pipeline_config.eval_input_reader.queue_capacity = 100
configs = config_util.create_configs_from_pipeline_proto(pipeline_config)
self.assertProtoEquals(pipeline_config.model, configs["model"])
self.assertProtoEquals(pipeline_config.train_config,
configs["train_config"])
self.assertProtoEquals(pipeline_config.train_input_reader,
configs["train_input_config"])
self.assertProtoEquals(pipeline_config.eval_config, configs["eval_config"])
self.assertProtoEquals(pipeline_config.eval_input_reader,
configs["eval_input_config"])
def test_create_pipeline_proto_from_configs(self): def test_create_pipeline_proto_from_configs(self):
"""Tests that proto can be reconstructed from configs dictionary.""" """Tests that proto can be reconstructed from configs dictionary."""
pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config") pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
......
...@@ -34,7 +34,8 @@ def _validate_label_map(label_map): ...@@ -34,7 +34,8 @@ def _validate_label_map(label_map):
for item in label_map.item: for item in label_map.item:
if item.id < 0: if item.id < 0:
raise ValueError('Label map ids should be >= 0.') raise ValueError('Label map ids should be >= 0.')
if item.id == 0 and item.name != 'background': if (item.id == 0 and item.name != 'background' and
item.display_name != 'background'):
raise ValueError('Label map id 0 is reserved for the background label') raise ValueError('Label map id 0 is reserved for the background label')
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Functions for computing metrics like precision, recall, CorLoc and etc.""" """Functions for computing metrics like precision, recall, CorLoc and etc."""
from __future__ import division from __future__ import division
...@@ -24,7 +23,7 @@ def compute_precision_recall(scores, labels, num_gt): ...@@ -24,7 +23,7 @@ def compute_precision_recall(scores, labels, num_gt):
Args: Args:
scores: A float numpy array representing detection score scores: A float numpy array representing detection score
labels: A boolean numpy array representing true/false positive labels labels: A float numpy array representing weighted true/false positive labels
num_gt: Number of ground truth instances num_gt: Number of ground truth instances
Raises: Raises:
...@@ -37,12 +36,13 @@ def compute_precision_recall(scores, labels, num_gt): ...@@ -37,12 +36,13 @@ def compute_precision_recall(scores, labels, num_gt):
This value is None if no ground truth labels are present. This value is None if no ground truth labels are present.
""" """
if not isinstance( if not isinstance(labels, np.ndarray) or len(labels.shape) != 1:
labels, np.ndarray) or labels.dtype != np.bool or len(labels.shape) != 1: raise ValueError("labels must be single dimension numpy array")
raise ValueError("labels must be single dimension bool numpy array")
if labels.dtype != np.float and labels.dtype != np.bool:
raise ValueError("labels type must be either bool or float")
if not isinstance( if not isinstance(scores, np.ndarray) or len(scores.shape) != 1:
scores, np.ndarray) or len(scores.shape) != 1:
raise ValueError("scores must be single dimension numpy array") raise ValueError("scores must be single dimension numpy array")
if num_gt < np.sum(labels): if num_gt < np.sum(labels):
...@@ -56,9 +56,8 @@ def compute_precision_recall(scores, labels, num_gt): ...@@ -56,9 +56,8 @@ def compute_precision_recall(scores, labels, num_gt):
sorted_indices = np.argsort(scores) sorted_indices = np.argsort(scores)
sorted_indices = sorted_indices[::-1] sorted_indices = sorted_indices[::-1]
labels = labels.astype(int)
true_positive_labels = labels[sorted_indices] true_positive_labels = labels[sorted_indices]
false_positive_labels = 1 - true_positive_labels false_positive_labels = (true_positive_labels <= 0).astype(float)
cum_true_positives = np.cumsum(true_positive_labels) cum_true_positives = np.cumsum(true_positive_labels)
cum_false_positives = np.cumsum(false_positive_labels) cum_false_positives = np.cumsum(false_positive_labels)
precision = cum_true_positives.astype(float) / ( precision = cum_true_positives.astype(float) / (
...@@ -90,8 +89,8 @@ def compute_average_precision(precision, recall): ...@@ -90,8 +89,8 @@ def compute_average_precision(precision, recall):
raise ValueError("If precision is None, recall must also be None") raise ValueError("If precision is None, recall must also be None")
return np.NAN return np.NAN
if not isinstance(precision, np.ndarray) or not isinstance(recall, if not isinstance(precision, np.ndarray) or not isinstance(
np.ndarray): recall, np.ndarray):
raise ValueError("precision and recall must be numpy array") raise ValueError("precision and recall must be numpy array")
if precision.dtype != np.float or recall.dtype != np.float: if precision.dtype != np.float or recall.dtype != np.float:
raise ValueError("input must be float numpy array.") raise ValueError("input must be float numpy array.")
...@@ -139,6 +138,53 @@ def compute_cor_loc(num_gt_imgs_per_class, ...@@ -139,6 +138,53 @@ def compute_cor_loc(num_gt_imgs_per_class,
class class
""" """
return np.where( return np.where(
num_gt_imgs_per_class == 0, num_gt_imgs_per_class == 0, np.nan,
np.nan,
num_images_correctly_detected_per_class / num_gt_imgs_per_class) num_images_correctly_detected_per_class / num_gt_imgs_per_class)
def compute_median_rank_at_k(tp_fp_list, k):
"""Computes MedianRank@k, where k is the top-scoring labels.
Args:
tp_fp_list: a list of numpy arrays; each numpy array corresponds to the all
detection on a single image, where the detections are sorted by score in
descending order. Further, each numpy array element can have boolean or
float values. True positive elements have either value >0.0 or True;
any other value is considered false positive.
k: number of top-scoring proposals to take.
Returns:
median_rank: median rank of all true positive proposals among top k by
score.
"""
ranks = []
for i in range(len(tp_fp_list)):
ranks.append(
np.where(tp_fp_list[i][0:min(k, tp_fp_list[i].shape[0])] > 0)[0])
concatenated_ranks = np.concatenate(ranks)
return np.median(concatenated_ranks)
def compute_recall_at_k(tp_fp_list, num_gt, k):
"""Computes Recall@k, MedianRank@k, where k is the top-scoring labels.
Args:
tp_fp_list: a list of numpy arrays; each numpy array corresponds to the all
detection on a single image, where the detections are sorted by score in
descending order. Further, each numpy array element can have boolean or
float values. True positive elements have either value >0.0 or True;
any other value is considered false positive.
num_gt: number of groundtruth anotations.
k: number of top-scoring proposals to take.
Returns:
recall: recall evaluated on the top k by score detections.
"""
tp_fp_eval = []
for i in range(len(tp_fp_list)):
tp_fp_eval.append(tp_fp_list[i][0:min(k, tp_fp_list[i].shape[0])])
tp_fp_eval = np.concatenate(tp_fp_eval)
return np.sum(tp_fp_eval) / num_gt
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Tests for object_detection.metrics.""" """Tests for object_detection.metrics."""
import numpy as np import numpy as np
...@@ -25,8 +24,8 @@ class MetricsTest(tf.test.TestCase): ...@@ -25,8 +24,8 @@ class MetricsTest(tf.test.TestCase):
def test_compute_cor_loc(self): def test_compute_cor_loc(self):
num_gt_imgs_per_class = np.array([100, 1, 5, 1, 1], dtype=int) num_gt_imgs_per_class = np.array([100, 1, 5, 1, 1], dtype=int)
num_images_correctly_detected_per_class = np.array([10, 0, 1, 0, 0], num_images_correctly_detected_per_class = np.array(
dtype=int) [10, 0, 1, 0, 0], dtype=int)
corloc = metrics.compute_cor_loc(num_gt_imgs_per_class, corloc = metrics.compute_cor_loc(num_gt_imgs_per_class,
num_images_correctly_detected_per_class) num_images_correctly_detected_per_class)
expected_corloc = np.array([0.1, 0, 0.2, 0, 0], dtype=float) expected_corloc = np.array([0.1, 0, 0.2, 0, 0], dtype=float)
...@@ -34,8 +33,8 @@ class MetricsTest(tf.test.TestCase): ...@@ -34,8 +33,8 @@ class MetricsTest(tf.test.TestCase):
def test_compute_cor_loc_nans(self): def test_compute_cor_loc_nans(self):
num_gt_imgs_per_class = np.array([100, 0, 0, 1, 1], dtype=int) num_gt_imgs_per_class = np.array([100, 0, 0, 1, 1], dtype=int)
num_images_correctly_detected_per_class = np.array([10, 0, 1, 0, 0], num_images_correctly_detected_per_class = np.array(
dtype=int) [10, 0, 1, 0, 0], dtype=int)
corloc = metrics.compute_cor_loc(num_gt_imgs_per_class, corloc = metrics.compute_cor_loc(num_gt_imgs_per_class,
num_images_correctly_detected_per_class) num_images_correctly_detected_per_class)
expected_corloc = np.array([0.1, np.nan, np.nan, 0, 0], dtype=float) expected_corloc = np.array([0.1, np.nan, np.nan, 0, 0], dtype=float)
...@@ -45,18 +44,37 @@ class MetricsTest(tf.test.TestCase): ...@@ -45,18 +44,37 @@ class MetricsTest(tf.test.TestCase):
num_gt = 10 num_gt = 10
scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float) scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float)
labels = np.array([0, 1, 1, 0, 0, 1], dtype=bool) labels = np.array([0, 1, 1, 0, 0, 1], dtype=bool)
labels_float_type = np.array([0, 1, 1, 0, 0, 1], dtype=float)
accumulated_tp_count = np.array([0, 1, 1, 2, 2, 3], dtype=float) accumulated_tp_count = np.array([0, 1, 1, 2, 2, 3], dtype=float)
expected_precision = accumulated_tp_count / np.array([1, 2, 3, 4, 5, 6]) expected_precision = accumulated_tp_count / np.array([1, 2, 3, 4, 5, 6])
expected_recall = accumulated_tp_count / num_gt expected_recall = accumulated_tp_count / num_gt
precision, recall = metrics.compute_precision_recall(scores, labels, num_gt) precision, recall = metrics.compute_precision_recall(scores, labels, num_gt)
precision_float_type, recall_float_type = metrics.compute_precision_recall(
scores, labels_float_type, num_gt)
self.assertAllClose(precision, expected_precision)
self.assertAllClose(recall, expected_recall)
self.assertAllClose(precision_float_type, expected_precision)
self.assertAllClose(recall_float_type, expected_recall)
def test_compute_precision_recall_float(self):
num_gt = 10
scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float)
labels_float = np.array([0, 1, 1, 0.5, 0, 1], dtype=float)
expected_precision = np.array(
[0., 0.5, 0.33333333, 0.5, 0.55555556, 0.63636364], dtype=float)
expected_recall = np.array([0., 0.1, 0.1, 0.2, 0.25, 0.35], dtype=float)
precision, recall = metrics.compute_precision_recall(
scores, labels_float, num_gt)
self.assertAllClose(precision, expected_precision) self.assertAllClose(precision, expected_precision)
self.assertAllClose(recall, expected_recall) self.assertAllClose(recall, expected_recall)
def test_compute_average_precision(self): def test_compute_average_precision(self):
precision = np.array([0.8, 0.76, 0.9, 0.65, 0.7, 0.5, 0.55, 0], dtype=float) precision = np.array([0.8, 0.76, 0.9, 0.65, 0.7, 0.5, 0.55, 0], dtype=float)
recall = np.array([0.3, 0.3, 0.4, 0.4, 0.45, 0.45, 0.5, 0.5], dtype=float) recall = np.array([0.3, 0.3, 0.4, 0.4, 0.45, 0.45, 0.5, 0.5], dtype=float)
processed_precision = np.array([0.9, 0.9, 0.9, 0.7, 0.7, 0.55, 0.55, 0], processed_precision = np.array(
dtype=float) [0.9, 0.9, 0.9, 0.7, 0.7, 0.55, 0.55, 0], dtype=float)
recall_interval = np.array([0.3, 0, 0.1, 0, 0.05, 0, 0.05, 0], dtype=float) recall_interval = np.array([0.3, 0, 0.1, 0, 0.05, 0, 0.05, 0], dtype=float)
expected_mean_ap = np.sum(recall_interval * processed_precision) expected_mean_ap = np.sum(recall_interval * processed_precision)
mean_ap = metrics.compute_average_precision(precision, recall) mean_ap = metrics.compute_average_precision(precision, recall)
...@@ -74,6 +92,52 @@ class MetricsTest(tf.test.TestCase): ...@@ -74,6 +92,52 @@ class MetricsTest(tf.test.TestCase):
ap = metrics.compute_average_precision(precision, recall) ap = metrics.compute_average_precision(precision, recall)
self.assertTrue(np.isnan(ap)) self.assertTrue(np.isnan(ap))
def test_compute_recall_at_k(self):
num_gt = 4
tp_fp = [
np.array([1, 0, 0], dtype=float),
np.array([0, 1], dtype=float),
np.array([0, 0, 0, 0, 0], dtype=float)
]
tp_fp_bool = [
np.array([True, False, False], dtype=bool),
np.array([False, True], dtype=float),
np.array([False, False, False, False, False], dtype=float)
]
recall_1 = metrics.compute_recall_at_k(tp_fp, num_gt, 1)
recall_3 = metrics.compute_recall_at_k(tp_fp, num_gt, 3)
recall_5 = metrics.compute_recall_at_k(tp_fp, num_gt, 5)
recall_3_bool = metrics.compute_recall_at_k(tp_fp_bool, num_gt, 3)
self.assertAlmostEqual(recall_1, 0.25)
self.assertAlmostEqual(recall_3, 0.5)
self.assertAlmostEqual(recall_3_bool, 0.5)
self.assertAlmostEqual(recall_5, 0.5)
def test_compute_median_rank_at_k(self):
tp_fp = [
np.array([1, 0, 0], dtype=float),
np.array([0, 0.1], dtype=float),
np.array([0, 0, 0, 0, 0], dtype=float)
]
tp_fp_bool = [
np.array([True, False, False], dtype=bool),
np.array([False, True], dtype=float),
np.array([False, False, False, False, False], dtype=float)
]
median_ranks_1 = metrics.compute_median_rank_at_k(tp_fp, 1)
median_ranks_3 = metrics.compute_median_rank_at_k(tp_fp, 3)
median_ranks_5 = metrics.compute_median_rank_at_k(tp_fp, 5)
median_ranks_3_bool = metrics.compute_median_rank_at_k(tp_fp_bool, 3)
self.assertEquals(median_ranks_1, 0)
self.assertEquals(median_ranks_3, 0.5)
self.assertEquals(median_ranks_3_bool, 0.5)
self.assertEquals(median_ranks_5, 0.5)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -110,7 +110,8 @@ class ObjectDetectionEvaluator(DetectionEvaluator): ...@@ -110,7 +110,8 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
evaluate_corlocs=False, evaluate_corlocs=False,
metric_prefix=None, metric_prefix=None,
use_weighted_mean_ap=False, use_weighted_mean_ap=False,
evaluate_masks=False): evaluate_masks=False,
group_of_weight=0.0):
"""Constructor. """Constructor.
Args: Args:
...@@ -128,6 +129,12 @@ class ObjectDetectionEvaluator(DetectionEvaluator): ...@@ -128,6 +129,12 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
of all classes. of all classes.
evaluate_masks: If False, evaluation will be performed based on boxes. evaluate_masks: If False, evaluation will be performed based on boxes.
If True, mask evaluation will be performed instead. If True, mask evaluation will be performed instead.
group_of_weight: Weight of group-of boxes.If set to 0, detections of the
correct class within a group-of box are ignored. If weight is > 0, then
if at least one detection falls within a group-of box with
matching_iou_threshold, weight group_of_weight is added to true
positives. Consequently, if no detection falls within a group-of box,
weight group_of_weight is added to false negatives.
Raises: Raises:
ValueError: If the category ids are not 1-indexed. ValueError: If the category ids are not 1-indexed.
...@@ -140,11 +147,13 @@ class ObjectDetectionEvaluator(DetectionEvaluator): ...@@ -140,11 +147,13 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
self._use_weighted_mean_ap = use_weighted_mean_ap self._use_weighted_mean_ap = use_weighted_mean_ap
self._label_id_offset = 1 self._label_id_offset = 1
self._evaluate_masks = evaluate_masks self._evaluate_masks = evaluate_masks
self._group_of_weight = group_of_weight
self._evaluation = ObjectDetectionEvaluation( self._evaluation = ObjectDetectionEvaluation(
num_groundtruth_classes=self._num_classes, num_groundtruth_classes=self._num_classes,
matching_iou_threshold=self._matching_iou_threshold, matching_iou_threshold=self._matching_iou_threshold,
use_weighted_mean_ap=self._use_weighted_mean_ap, use_weighted_mean_ap=self._use_weighted_mean_ap,
label_id_offset=self._label_id_offset) label_id_offset=self._label_id_offset,
group_of_weight=self._group_of_weight)
self._image_ids = set([]) self._image_ids = set([])
self._evaluate_corlocs = evaluate_corlocs self._evaluate_corlocs = evaluate_corlocs
self._metric_prefix = (metric_prefix + '_') if metric_prefix else '' self._metric_prefix = (metric_prefix + '_') if metric_prefix else ''
...@@ -383,7 +392,9 @@ class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator): ...@@ -383,7 +392,9 @@ class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator):
def __init__(self, def __init__(self,
categories, categories,
matching_iou_threshold=0.5, matching_iou_threshold=0.5,
evaluate_corlocs=False): evaluate_corlocs=False,
metric_prefix='OpenImagesV2',
group_of_weight=0.0):
"""Constructor. """Constructor.
Args: Args:
...@@ -393,12 +404,21 @@ class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator): ...@@ -393,12 +404,21 @@ class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator):
matching_iou_threshold: IOU threshold to use for matching groundtruth matching_iou_threshold: IOU threshold to use for matching groundtruth
boxes to detection boxes. boxes to detection boxes.
evaluate_corlocs: if True, additionally evaluates and returns CorLoc. evaluate_corlocs: if True, additionally evaluates and returns CorLoc.
metric_prefix: Prefix name of the metric.
group_of_weight: Weight of the group-of bounding box. If set to 0 (default
for Open Images V2 detection protocol), detections of the correct class
within a group-of box are ignored. If weight is > 0, then if at least
one detection falls within a group-of box with matching_iou_threshold,
weight group_of_weight is added to true positives. Consequently, if no
detection falls within a group-of box, weight group_of_weight is added
to false negatives.
""" """
super(OpenImagesDetectionEvaluator, self).__init__( super(OpenImagesDetectionEvaluator, self).__init__(
categories, categories,
matching_iou_threshold, matching_iou_threshold,
evaluate_corlocs, evaluate_corlocs,
metric_prefix='OpenImagesV2') metric_prefix=metric_prefix,
group_of_weight=group_of_weight)
def add_single_ground_truth_image_info(self, image_id, groundtruth_dict): def add_single_ground_truth_image_info(self, image_id, groundtruth_dict):
"""Adds groundtruth for a single image to be used for evaluation. """Adds groundtruth for a single image to be used for evaluation.
...@@ -449,6 +469,130 @@ class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator): ...@@ -449,6 +469,130 @@ class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator):
self._image_ids.update([image_id]) self._image_ids.update([image_id])
class OpenImagesDetectionChallengeEvaluator(OpenImagesDetectionEvaluator):
"""A class implements Open Images Challenge Detection metrics.
Open Images Challenge Detection metric has two major changes in comparison
with Open Images V2 detection metric:
- a custom weight might be specified for detecting an object contained in
a group-of box.
- verified image-level labels should be explicitelly provided for
evaluation: in case in image has neither positive nor negative image level
label of class c, all detections of this class on this image will be
ignored.
"""
def __init__(self,
categories,
matching_iou_threshold=0.5,
evaluate_corlocs=False,
group_of_weight=1.0):
"""Constructor.
Args:
categories: A list of dicts, each of which has the following keys -
'id': (required) an integer id uniquely identifying this category.
'name': (required) string representing category name e.g., 'cat', 'dog'.
matching_iou_threshold: IOU threshold to use for matching groundtruth
boxes to detection boxes.
evaluate_corlocs: if True, additionally evaluates and returns CorLoc.
group_of_weight: weight of a group-of box. If set to 0, detections of the
correct class within a group-of box are ignored. If weight is > 0
(default for Open Images Detection Challenge 2018), then if at least one
detection falls within a group-of box with matching_iou_threshold,
weight group_of_weight is added to true positives. Consequently, if no
detection falls within a group-of box, weight group_of_weight is added
to false negatives.
"""
super(OpenImagesDetectionChallengeEvaluator, self).__init__(
categories,
matching_iou_threshold,
evaluate_corlocs,
metric_prefix='OpenImagesChallenge2018',
group_of_weight=group_of_weight)
self._evaluatable_labels = {}
def add_single_ground_truth_image_info(self, image_id, groundtruth_dict):
"""Adds groundtruth for a single image to be used for evaluation.
Args:
image_id: A unique string/integer identifier for the image.
groundtruth_dict: A dictionary containing -
standard_fields.InputDataFields.groundtruth_boxes: float32 numpy array
of shape [num_boxes, 4] containing `num_boxes` groundtruth boxes of
the format [ymin, xmin, ymax, xmax] in absolute image coordinates.
standard_fields.InputDataFields.groundtruth_classes: integer numpy array
of shape [num_boxes] containing 1-indexed groundtruth classes for the
boxes.
standard_fields.InputDataFields.verified_labels: integer 1D numpy array
containing all classes for which labels are verified.
standard_fields.InputDataFields.groundtruth_group_of: Optional length
M numpy boolean array denoting whether a groundtruth box contains a
group of instances.
Raises:
ValueError: On adding groundtruth for an image more than once.
"""
super(OpenImagesDetectionChallengeEvaluator,
self).add_single_ground_truth_image_info(image_id, groundtruth_dict)
groundtruth_classes = (
groundtruth_dict[standard_fields.InputDataFields.groundtruth_classes] -
self._label_id_offset)
self._evaluatable_labels[image_id] = np.unique(
np.concatenate(((groundtruth_dict.get(
standard_fields.InputDataFields.verified_labels,
np.array([], dtype=int)) - self._label_id_offset),
groundtruth_classes)))
def add_single_detected_image_info(self, image_id, detections_dict):
"""Adds detections for a single image to be used for evaluation.
Args:
image_id: A unique string/integer identifier for the image.
detections_dict: A dictionary containing -
standard_fields.DetectionResultFields.detection_boxes: float32 numpy
array of shape [num_boxes, 4] containing `num_boxes` detection boxes
of the format [ymin, xmin, ymax, xmax] in absolute image coordinates.
standard_fields.DetectionResultFields.detection_scores: float32 numpy
array of shape [num_boxes] containing detection scores for the boxes.
standard_fields.DetectionResultFields.detection_classes: integer numpy
array of shape [num_boxes] containing 1-indexed detection classes for
the boxes.
Raises:
ValueError: If detection masks are not in detections dictionary.
"""
if image_id not in self._image_ids:
# Since for the correct work of evaluator it is assumed that groundtruth
# is inserted first we make sure to break the code if is it not the case.
self._image_ids.update([image_id])
self._evaluatable_labels[image_id] = np.array([])
detection_classes = (
detections_dict[standard_fields.DetectionResultFields.detection_classes]
- self._label_id_offset)
allowed_classes = np.where(
np.isin(detection_classes, self._evaluatable_labels[image_id]))
detection_classes = detection_classes[allowed_classes]
detected_boxes = detections_dict[
standard_fields.DetectionResultFields.detection_boxes][allowed_classes]
detected_scores = detections_dict[
standard_fields.DetectionResultFields.detection_scores][allowed_classes]
self._evaluation.add_single_detected_image_info(
image_key=image_id,
detected_boxes=detected_boxes,
detected_scores=detected_scores,
detected_class_labels=detection_classes)
def clear(self):
"""Clears stored data."""
super(OpenImagesDetectionChallengeEvaluator, self).clear()
self._evaluatable_labels.clear()
ObjectDetectionEvalMetrics = collections.namedtuple( ObjectDetectionEvalMetrics = collections.namedtuple(
'ObjectDetectionEvalMetrics', [ 'ObjectDetectionEvalMetrics', [
'average_precisions', 'mean_ap', 'precisions', 'recalls', 'corlocs', 'average_precisions', 'mean_ap', 'precisions', 'recalls', 'corlocs',
...@@ -465,7 +609,8 @@ class ObjectDetectionEvaluation(object): ...@@ -465,7 +609,8 @@ class ObjectDetectionEvaluation(object):
nms_iou_threshold=1.0, nms_iou_threshold=1.0,
nms_max_output_boxes=10000, nms_max_output_boxes=10000,
use_weighted_mean_ap=False, use_weighted_mean_ap=False,
label_id_offset=0): label_id_offset=0,
group_of_weight=0.0):
if num_groundtruth_classes < 1: if num_groundtruth_classes < 1:
raise ValueError('Need at least 1 groundtruth class for evaluation.') raise ValueError('Need at least 1 groundtruth class for evaluation.')
...@@ -473,7 +618,9 @@ class ObjectDetectionEvaluation(object): ...@@ -473,7 +618,9 @@ class ObjectDetectionEvaluation(object):
num_groundtruth_classes=num_groundtruth_classes, num_groundtruth_classes=num_groundtruth_classes,
matching_iou_threshold=matching_iou_threshold, matching_iou_threshold=matching_iou_threshold,
nms_iou_threshold=nms_iou_threshold, nms_iou_threshold=nms_iou_threshold,
nms_max_output_boxes=nms_max_output_boxes) nms_max_output_boxes=nms_max_output_boxes,
group_of_weight=group_of_weight)
self.group_of_weight = group_of_weight
self.num_class = num_groundtruth_classes self.num_class = num_groundtruth_classes
self.use_weighted_mean_ap = use_weighted_mean_ap self.use_weighted_mean_ap = use_weighted_mean_ap
self.label_id_offset = label_id_offset self.label_id_offset = label_id_offset
...@@ -483,7 +630,7 @@ class ObjectDetectionEvaluation(object): ...@@ -483,7 +630,7 @@ class ObjectDetectionEvaluation(object):
self.groundtruth_masks = {} self.groundtruth_masks = {}
self.groundtruth_is_difficult_list = {} self.groundtruth_is_difficult_list = {}
self.groundtruth_is_group_of_list = {} self.groundtruth_is_group_of_list = {}
self.num_gt_instances_per_class = np.zeros(self.num_class, dtype=int) self.num_gt_instances_per_class = np.zeros(self.num_class, dtype=float)
self.num_gt_imgs_per_class = np.zeros(self.num_class, dtype=int) self.num_gt_imgs_per_class = np.zeros(self.num_class, dtype=int)
self._initialize_detections() self._initialize_detections()
...@@ -650,7 +797,10 @@ class ObjectDetectionEvaluation(object): ...@@ -650,7 +797,10 @@ class ObjectDetectionEvaluation(object):
num_gt_instances = np.sum(groundtruth_class_labels[ num_gt_instances = np.sum(groundtruth_class_labels[
~groundtruth_is_difficult_list ~groundtruth_is_difficult_list
& ~groundtruth_is_group_of_list] == class_index) & ~groundtruth_is_group_of_list] == class_index)
self.num_gt_instances_per_class[class_index] += num_gt_instances num_groupof_gt_instances = self.group_of_weight * np.sum(
groundtruth_class_labels[groundtruth_is_group_of_list] == class_index)
self.num_gt_instances_per_class[
class_index] += num_gt_instances + num_groupof_gt_instances
if np.any(groundtruth_class_labels == class_index): if np.any(groundtruth_class_labels == class_index):
self.num_gt_imgs_per_class[class_index] += 1 self.num_gt_imgs_per_class[class_index] += 1
...@@ -677,13 +827,12 @@ class ObjectDetectionEvaluation(object): ...@@ -677,13 +827,12 @@ class ObjectDetectionEvaluation(object):
if self.use_weighted_mean_ap: if self.use_weighted_mean_ap:
all_scores = np.array([], dtype=float) all_scores = np.array([], dtype=float)
all_tp_fp_labels = np.array([], dtype=bool) all_tp_fp_labels = np.array([], dtype=bool)
for class_index in range(self.num_class): for class_index in range(self.num_class):
if self.num_gt_instances_per_class[class_index] == 0: if self.num_gt_instances_per_class[class_index] == 0:
continue continue
if not self.scores_per_class[class_index]: if not self.scores_per_class[class_index]:
scores = np.array([], dtype=float) scores = np.array([], dtype=float)
tp_fp_labels = np.array([], dtype=bool) tp_fp_labels = np.array([], dtype=float)
else: else:
scores = np.concatenate(self.scores_per_class[class_index]) scores = np.concatenate(self.scores_per_class[class_index])
tp_fp_labels = np.concatenate(self.tp_fp_labels_per_class[class_index]) tp_fp_labels = np.concatenate(self.tp_fp_labels_per_class[class_index])
......
...@@ -100,6 +100,126 @@ class OpenImagesV2EvaluationTest(tf.test.TestCase): ...@@ -100,6 +100,126 @@ class OpenImagesV2EvaluationTest(tf.test.TestCase):
self.assertFalse(oiv2_evaluator._image_ids) self.assertFalse(oiv2_evaluator._image_ids)
class OpenImagesDetectionChallengeEvaluatorTest(tf.test.TestCase):
def test_returns_correct_metric_values(self):
categories = [{
'id': 1,
'name': 'cat'
}, {
'id': 2,
'name': 'dog'
}, {
'id': 3,
'name': 'elephant'
}]
oivchallenge_evaluator = (
object_detection_evaluation.OpenImagesDetectionChallengeEvaluator(
categories, group_of_weight=0.5))
image_key = 'img1'
groundtruth_boxes = np.array(
[[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]], dtype=float)
groundtruth_class_labels = np.array([1, 3, 1], dtype=int)
groundtruth_is_group_of_list = np.array([False, False, True], dtype=bool)
groundtruth_verified_labels = np.array([1, 2, 3], dtype=int)
oivchallenge_evaluator.add_single_ground_truth_image_info(
image_key, {
standard_fields.InputDataFields.groundtruth_boxes:
groundtruth_boxes,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels,
standard_fields.InputDataFields.groundtruth_group_of:
groundtruth_is_group_of_list,
standard_fields.InputDataFields.verified_labels:
groundtruth_verified_labels,
})
image_key = 'img2'
groundtruth_boxes = np.array(
[[10, 10, 11, 11], [500, 500, 510, 510], [10, 10, 12, 12]], dtype=float)
groundtruth_class_labels = np.array([1, 1, 3], dtype=int)
groundtruth_is_group_of_list = np.array([False, False, True], dtype=bool)
oivchallenge_evaluator.add_single_ground_truth_image_info(
image_key, {
standard_fields.InputDataFields.groundtruth_boxes:
groundtruth_boxes,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels,
standard_fields.InputDataFields.groundtruth_group_of:
groundtruth_is_group_of_list
})
image_key = 'img3'
groundtruth_boxes = np.array([[0, 0, 1, 1]], dtype=float)
groundtruth_class_labels = np.array([2], dtype=int)
oivchallenge_evaluator.add_single_ground_truth_image_info(
image_key, {
standard_fields.InputDataFields.groundtruth_boxes:
groundtruth_boxes,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels
})
image_key = 'img1'
detected_boxes = np.array(
[[10, 10, 11, 11], [100, 100, 120, 120]], dtype=float)
detected_class_labels = np.array([2, 2], dtype=int)
detected_scores = np.array([0.7, 0.8], dtype=float)
oivchallenge_evaluator.add_single_detected_image_info(
image_key, {
standard_fields.DetectionResultFields.detection_boxes:
detected_boxes,
standard_fields.DetectionResultFields.detection_scores:
detected_scores,
standard_fields.DetectionResultFields.detection_classes:
detected_class_labels
})
image_key = 'img2'
detected_boxes = np.array(
[[10, 10, 11, 11], [100, 100, 120, 120], [100, 100, 220, 220],
[10, 10, 11, 11]],
dtype=float)
detected_class_labels = np.array([1, 1, 2, 3], dtype=int)
detected_scores = np.array([0.7, 0.8, 0.5, 0.9], dtype=float)
oivchallenge_evaluator.add_single_detected_image_info(
image_key, {
standard_fields.DetectionResultFields.detection_boxes:
detected_boxes,
standard_fields.DetectionResultFields.detection_scores:
detected_scores,
standard_fields.DetectionResultFields.detection_classes:
detected_class_labels
})
image_key = 'img3'
detected_boxes = np.array([[0, 0, 1, 1]], dtype=float)
detected_class_labels = np.array([2], dtype=int)
detected_scores = np.array([0.5], dtype=float)
oivchallenge_evaluator.add_single_detected_image_info(
image_key, {
standard_fields.DetectionResultFields.detection_boxes:
detected_boxes,
standard_fields.DetectionResultFields.detection_scores:
detected_scores,
standard_fields.DetectionResultFields.detection_classes:
detected_class_labels
})
metrics = oivchallenge_evaluator.evaluate()
self.assertAlmostEqual(
metrics['OpenImagesChallenge2018_PerformanceByCategory/AP@0.5IOU/dog'],
0.3333333333)
self.assertAlmostEqual(
metrics[
'OpenImagesChallenge2018_PerformanceByCategory/AP@0.5IOU/elephant'],
0.333333333333)
self.assertAlmostEqual(
metrics['OpenImagesChallenge2018_PerformanceByCategory/AP@0.5IOU/cat'],
0.142857142857)
self.assertAlmostEqual(
metrics['OpenImagesChallenge2018_Precision/mAP@0.5IOU'], 0.269841269)
oivchallenge_evaluator.clear()
self.assertFalse(oivchallenge_evaluator._image_ids)
class PascalEvaluationTest(tf.test.TestCase): class PascalEvaluationTest(tf.test.TestCase):
def test_returns_correct_metric_values_on_boxes(self): def test_returns_correct_metric_values_on_boxes(self):
......
...@@ -35,7 +35,8 @@ class PerImageEvaluation(object): ...@@ -35,7 +35,8 @@ class PerImageEvaluation(object):
num_groundtruth_classes, num_groundtruth_classes,
matching_iou_threshold=0.5, matching_iou_threshold=0.5,
nms_iou_threshold=0.3, nms_iou_threshold=0.3,
nms_max_output_boxes=50): nms_max_output_boxes=50,
group_of_weight=0.0):
"""Initialized PerImageEvaluation by evaluation parameters. """Initialized PerImageEvaluation by evaluation parameters.
Args: Args:
...@@ -44,24 +45,26 @@ class PerImageEvaluation(object): ...@@ -44,24 +45,26 @@ class PerImageEvaluation(object):
the threshold to consider whether a detection is true positive or not the threshold to consider whether a detection is true positive or not
nms_iou_threshold: IOU threshold used in Non Maximum Suppression. nms_iou_threshold: IOU threshold used in Non Maximum Suppression.
nms_max_output_boxes: Number of maximum output boxes in NMS. nms_max_output_boxes: Number of maximum output boxes in NMS.
group_of_weight: Weight of the group-of boxes.
""" """
self.matching_iou_threshold = matching_iou_threshold self.matching_iou_threshold = matching_iou_threshold
self.nms_iou_threshold = nms_iou_threshold self.nms_iou_threshold = nms_iou_threshold
self.nms_max_output_boxes = nms_max_output_boxes self.nms_max_output_boxes = nms_max_output_boxes
self.num_groundtruth_classes = num_groundtruth_classes self.num_groundtruth_classes = num_groundtruth_classes
self.group_of_weight = group_of_weight
def compute_object_detection_metrics( def compute_object_detection_metrics(
self, detected_boxes, detected_scores, detected_class_labels, self, detected_boxes, detected_scores, detected_class_labels,
groundtruth_boxes, groundtruth_class_labels, groundtruth_boxes, groundtruth_class_labels,
groundtruth_is_difficult_list, groundtruth_is_group_of_list, groundtruth_is_difficult_list, groundtruth_is_group_of_list,
detected_masks=None, groundtruth_masks=None): detected_masks=None, groundtruth_masks=None):
"""Evaluates detections as being tp, fp or ignored from a single image. """Evaluates detections as being tp, fp or weighted from a single image.
The evaluation is done in two stages: The evaluation is done in two stages:
1. All detections are matched to non group-of boxes; true positives are 1. All detections are matched to non group-of boxes; true positives are
determined and detections matched to difficult boxes are ignored. determined and detections matched to difficult boxes are ignored.
2. Detections that are determined as false positives are matched against 2. Detections that are determined as false positives are matched against
group-of boxes and ignored if matched. group-of boxes and weighted if matched.
Args: Args:
detected_boxes: A float numpy array of shape [N, 4], representing N detected_boxes: A float numpy array of shape [N, 4], representing N
...@@ -339,7 +342,8 @@ class PerImageEvaluation(object): ...@@ -339,7 +342,8 @@ class PerImageEvaluation(object):
box_data=groundtruth_boxes[groundtruth_is_group_of_list], box_data=groundtruth_boxes[groundtruth_is_group_of_list],
mask_data=groundtruth_masks[groundtruth_is_group_of_list]) mask_data=groundtruth_masks[groundtruth_is_group_of_list])
iou = np_box_mask_list_ops.iou(detected_boxlist, gt_non_group_of_boxlist) iou = np_box_mask_list_ops.iou(detected_boxlist, gt_non_group_of_boxlist)
ioa = np_box_mask_list_ops.ioa(gt_group_of_boxlist, detected_boxlist) ioa = np.transpose(
np_box_mask_list_ops.ioa(gt_group_of_boxlist, detected_boxlist))
scores = detected_boxlist.get_field('scores') scores = detected_boxlist.get_field('scores')
num_boxes = detected_boxlist.num_boxes() num_boxes = detected_boxlist.num_boxes()
return iou, ioa, scores, num_boxes return iou, ioa, scores, num_boxes
...@@ -380,7 +384,8 @@ class PerImageEvaluation(object): ...@@ -380,7 +384,8 @@ class PerImageEvaluation(object):
gt_group_of_boxlist = np_box_list.BoxList( gt_group_of_boxlist = np_box_list.BoxList(
groundtruth_boxes[groundtruth_is_group_of_list]) groundtruth_boxes[groundtruth_is_group_of_list])
iou = np_box_list_ops.iou(detected_boxlist, gt_non_group_of_boxlist) iou = np_box_list_ops.iou(detected_boxlist, gt_non_group_of_boxlist)
ioa = np_box_list_ops.ioa(gt_group_of_boxlist, detected_boxlist) ioa = np.transpose(
np_box_list_ops.ioa(gt_group_of_boxlist, detected_boxlist))
scores = detected_boxlist.get_field('scores') scores = detected_boxlist.get_field('scores')
num_boxes = detected_boxlist.num_boxes() num_boxes = detected_boxlist.num_boxes()
return iou, ioa, scores, num_boxes return iou, ioa, scores, num_boxes
...@@ -455,7 +460,8 @@ class PerImageEvaluation(object): ...@@ -455,7 +460,8 @@ class PerImageEvaluation(object):
# 1. All detections are matched to non group-of boxes; true positives are # 1. All detections are matched to non group-of boxes; true positives are
# determined and detections matched to difficult boxes are ignored. # determined and detections matched to difficult boxes are ignored.
# 2. Detections that are determined as false positives are matched against # 2. Detections that are determined as false positives are matched against
# group-of boxes and ignored if matched. # group-of boxes and scored with weight w per ground truth box is
# matched.
# Tp-fp evaluation for non-group of boxes (if any). # Tp-fp evaluation for non-group of boxes (if any).
if iou.shape[1] > 0: if iou.shape[1] > 0:
...@@ -473,18 +479,29 @@ class PerImageEvaluation(object): ...@@ -473,18 +479,29 @@ class PerImageEvaluation(object):
else: else:
is_matched_to_difficult_box[i] = True is_matched_to_difficult_box[i] = True
scores_group_of = np.zeros(ioa.shape[1], dtype=float)
tp_fp_labels_group_of = self.group_of_weight * np.ones(
ioa.shape[1], dtype=float)
# Tp-fp evaluation for group of boxes. # Tp-fp evaluation for group of boxes.
if ioa.shape[0] > 0: if ioa.shape[1] > 0:
max_overlap_group_of_gt = np.max(ioa, axis=0) max_overlap_group_of_gt_ids = np.argmax(ioa, axis=1)
for i in range(num_detected_boxes): for i in range(num_detected_boxes):
gt_id = max_overlap_group_of_gt_ids[i]
if (not tp_fp_labels[i] and not is_matched_to_difficult_box[i] and if (not tp_fp_labels[i] and not is_matched_to_difficult_box[i] and
max_overlap_group_of_gt[i] >= self.matching_iou_threshold): ioa[i, gt_id] >= self.matching_iou_threshold):
is_matched_to_group_of_box[i] = True is_matched_to_group_of_box[i] = True
scores_group_of[gt_id] = max(scores_group_of[gt_id], scores[i])
return scores[~is_matched_to_difficult_box selector = np.where((scores_group_of > 0) & (tp_fp_labels_group_of > 0))
& ~is_matched_to_group_of_box], tp_fp_labels[ scores_group_of = scores_group_of[selector]
~is_matched_to_difficult_box tp_fp_labels_group_of = tp_fp_labels_group_of[selector]
& ~is_matched_to_group_of_box]
return np.concatenate(
(scores[~is_matched_to_difficult_box
& ~is_matched_to_group_of_box],
scores_group_of)), np.concatenate(
(tp_fp_labels[~is_matched_to_difficult_box
& ~is_matched_to_group_of_box].astype(float),
tp_fp_labels_group_of))
def _get_ith_class_arrays(self, detected_boxes, detected_scores, def _get_ith_class_arrays(self, detected_boxes, detected_scores,
detected_masks, detected_class_labels, detected_masks, detected_class_labels,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment