Commit 6b72b5cd authored by Zhichao Lu's avatar Zhichao Lu Committed by pkulzc
Browse files

Merged commit includes the following changes:

191649512  by Zhichao Lu:

    Introduce two parameters in ssd.proto - freeze_batchnorm, inplace_batchnorm_update - and set up slim arg_scopes in ssd_meta_arch.py such that applies it to all batchnorm ops in the predict() method.

    This centralizes the control of freezing and doing inplace batchnorm updates.

--
191620303  by Zhichao Lu:

    Modifications to the preprocessor to support multiclass scores

--
191610773  by Zhichao Lu:

    Adding multiclass_scores to InputDataFields and adding padding for multiclass_scores.

--
191595011  by Zhichao Lu:

    Contains implementation of the detection metric for the Open Images Challenge.

--
191449408  by Zhichao Lu:

    Change hyperparams_builder to return a callable so the users can inherit values from outer arg_scopes. This allows us to easily set batch_norm parameters like "is_training" and "inplace_batchnorm_update" for all feature extractors from the base class and propagate it correctly to the nested scopes.

--
191437008  by Zhichao Lu:

    Contains implementation of the Recall@N and MedianRank@N metrics.

--
191385254  by Zhichao Lu:

    Add config rewrite flag to eval.py

--
191382500  by Zhichao Lu:

    Fix bug for config_util.

--

PiperOrigin-RevId: 191649512
parent 143464d2
......@@ -26,6 +26,10 @@ from object_detection.utils import test_case
class SsdFeatureExtractorTestBase(test_case.TestCase):
def conv_hyperparams_fn(self):
with tf.contrib.slim.arg_scope([]) as sc:
return sc
@abstractmethod
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
use_explicit_padding=False):
......
......@@ -33,12 +33,10 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
batch_norm_trainable=True,
conv_hyperparams_fn,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False,
inplace_batchnorm_update=False):
use_depthwise=False):
"""InceptionV2 Feature Extractor for SSD Models.
Args:
......@@ -47,25 +45,16 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
"""
super(SSDInceptionV2FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights,
use_explicit_padding, use_depthwise, inplace_batchnorm_update)
conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise)
def preprocess(self, resized_inputs):
"""SSD preprocessing.
......@@ -82,7 +71,7 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""
return (2.0 / 255.0) * resized_inputs - 1.0
def _extract_features(self, preprocessed_inputs):
def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
......@@ -103,7 +92,7 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
'use_depthwise': self._use_depthwise,
}
with slim.arg_scope(self._conv_hyperparams):
with slim.arg_scope(self._conv_hyperparams_fn()):
with tf.variable_scope('InceptionV2',
reuse=self._reuse_weights) as scope:
_, image_features = inception_v2.inception_v2_base(
......
......@@ -25,7 +25,7 @@ class SsdInceptionV2FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, batch_norm_trainable=True):
is_training=True):
"""Constructs a SsdInceptionV2FeatureExtractor.
Args:
......@@ -33,16 +33,14 @@ class SsdInceptionV2FeatureExtractorTest(
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
is_training: whether the network is in training mode.
batch_norm_trainable: Whether to update batch norm parameters during
training or not
Returns:
an ssd_inception_v2_feature_extractor.SsdInceptionV2FeatureExtractor.
"""
min_depth = 32
conv_hyperparams = {}
return ssd_inception_v2_feature_extractor.SSDInceptionV2FeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable)
self.conv_hyperparams_fn)
def test_extract_features_returns_correct_shapes_128(self):
image_height = 128
......
......@@ -33,12 +33,10 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
batch_norm_trainable=True,
conv_hyperparams_fn,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False,
inplace_batchnorm_update=False):
use_depthwise=False):
"""InceptionV3 Feature Extractor for SSD Models.
Args:
......@@ -47,25 +45,16 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
"""
super(SSDInceptionV3FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights,
use_explicit_padding, use_depthwise, inplace_batchnorm_update)
conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise)
def preprocess(self, resized_inputs):
"""SSD preprocessing.
......@@ -82,7 +71,7 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""
return (2.0 / 255.0) * resized_inputs - 1.0
def _extract_features(self, preprocessed_inputs):
def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
......@@ -103,7 +92,7 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
'use_depthwise': self._use_depthwise,
}
with slim.arg_scope(self._conv_hyperparams):
with slim.arg_scope(self._conv_hyperparams_fn()):
with tf.variable_scope('InceptionV3', reuse=self._reuse_weights) as scope:
_, image_features = inception_v3.inception_v3_base(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
......
......@@ -25,7 +25,7 @@ class SsdInceptionV3FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, batch_norm_trainable=True):
is_training=True):
"""Constructs a SsdInceptionV3FeatureExtractor.
Args:
......@@ -33,16 +33,14 @@ class SsdInceptionV3FeatureExtractorTest(
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
is_training: whether the network is in training mode.
batch_norm_trainable: Whether to update batch norm parameters during
training or not
Returns:
an ssd_inception_v3_feature_extractor.SsdInceptionV3FeatureExtractor.
"""
min_depth = 32
conv_hyperparams = {}
return ssd_inception_v3_feature_extractor.SSDInceptionV3FeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable)
self.conv_hyperparams_fn)
def test_extract_features_returns_correct_shapes_128(self):
image_height = 128
......
......@@ -34,12 +34,10 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
batch_norm_trainable=True,
conv_hyperparams_fn,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False,
inplace_batchnorm_update=False):
use_depthwise=False):
"""MobileNetV1 Feature Extractor for SSD Models.
Args:
......@@ -48,26 +46,17 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
use_depthwise: Whether to use depthwise convolutions. Default is False.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
"""
super(SSDMobileNetV1FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights,
use_explicit_padding, use_depthwise, inplace_batchnorm_update)
conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise)
def preprocess(self, resized_inputs):
"""SSD preprocessing.
......@@ -84,7 +73,7 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""
return (2.0 / 255.0) * resized_inputs - 1.0
def _extract_features(self, preprocessed_inputs):
def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
......@@ -109,8 +98,7 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
with tf.variable_scope('MobilenetV1',
reuse=self._reuse_weights) as scope:
with slim.arg_scope(
mobilenet_v1.mobilenet_v1_arg_scope(
is_training=(self._batch_norm_trainable and self._is_training))):
mobilenet_v1.mobilenet_v1_arg_scope(is_training=None)):
# TODO(skligys): Enable fused batch norm once quantization supports it.
with slim.arg_scope([slim.batch_norm], fused=False):
_, image_features = mobilenet_v1.mobilenet_v1_base(
......@@ -120,7 +108,7 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
depth_multiplier=self._depth_multiplier,
use_explicit_padding=self._use_explicit_padding,
scope=scope)
with slim.arg_scope(self._conv_hyperparams):
with slim.arg_scope(self._conv_hyperparams_fn()):
# TODO(skligys): Enable fused batch norm once quantization supports it.
with slim.arg_scope([slim.batch_norm], fused=False):
feature_maps = feature_map_generators.multi_resolution_feature_maps(
......
......@@ -27,8 +27,7 @@ class SsdMobilenetV1FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, batch_norm_trainable=True,
use_explicit_padding=False):
is_training=True, use_explicit_padding=False):
"""Constructs a new feature extractor.
Args:
......@@ -36,8 +35,6 @@ class SsdMobilenetV1FeatureExtractorTest(
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
is_training: whether the network is in training mode.
batch_norm_trainable: Whether to update batch norm parameters during
training or not.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
......@@ -45,11 +42,9 @@ class SsdMobilenetV1FeatureExtractorTest(
an ssd_meta_arch.SSDFeatureExtractor object.
"""
min_depth = 32
with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm) as sc:
conv_hyperparams = sc
return ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable=batch_norm_trainable,
self.conv_hyperparams_fn,
use_explicit_padding=use_explicit_padding)
def test_extract_features_returns_correct_shapes_128(self):
......
......@@ -35,12 +35,10 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
batch_norm_trainable=True,
conv_hyperparams_fn,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False,
inplace_batchnorm_update=False):
use_depthwise=False):
"""MobileNetV2 Feature Extractor for SSD Models.
Mobilenet v2 (experimental), designed by sandler@. More details can be found
......@@ -52,25 +50,16 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
"""
super(SSDMobileNetV2FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights,
use_explicit_padding, use_depthwise, inplace_batchnorm_update)
conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise)
def preprocess(self, resized_inputs):
"""SSD preprocessing.
......@@ -87,7 +76,7 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""
return (2.0 / 255.0) * resized_inputs - 1.0
def _extract_features(self, preprocessed_inputs):
def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
......@@ -110,9 +99,7 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope:
with slim.arg_scope(
mobilenet_v2.training_scope(
is_training=(self._is_training and self._batch_norm_trainable),
bn_decay=0.9997)), \
mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \
slim.arg_scope(
[mobilenet.depth_multiplier], min_depth=self._min_depth):
# TODO(b/68150321): Enable fused batch norm once quantization
......@@ -124,7 +111,7 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
depth_multiplier=self._depth_multiplier,
use_explicit_padding=self._use_explicit_padding,
scope=scope)
with slim.arg_scope(self._conv_hyperparams):
with slim.arg_scope(self._conv_hyperparams_fn()):
# TODO(b/68150321): Enable fused batch norm once quantization
# supports it.
with slim.arg_scope([slim.batch_norm], fused=False):
......
......@@ -41,14 +41,12 @@ class SsdMobilenetV2FeatureExtractorTest(
an ssd_meta_arch.SSDFeatureExtractor object.
"""
min_depth = 32
with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm) as sc:
conv_hyperparams = sc
return ssd_mobilenet_v2_feature_extractor.SSDMobileNetV2FeatureExtractor(
False,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
self.conv_hyperparams_fn,
use_explicit_padding=use_explicit_padding)
def test_extract_features_returns_correct_shapes_128(self):
......
......@@ -36,15 +36,13 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
conv_hyperparams_fn,
resnet_base_fn,
resnet_scope_name,
fpn_scope_name,
batch_norm_trainable=True,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False,
inplace_batchnorm_update=False):
use_depthwise=False):
"""SSD FPN feature extractor based on Resnet v1 architecture.
Args:
......@@ -54,32 +52,23 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops.
resnet_base_fn: base resnet network to use.
resnet_scope_name: scope name under which to construct resnet
fpn_scope_name: scope name under which to construct the feature pyramid
network.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
Raises:
ValueError: On supplying invalid arguments for unused arguments.
"""
super(_SSDResnetV1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights,
use_explicit_padding, inplace_batchnorm_update)
conv_hyperparams_fn, reuse_weights, use_explicit_padding)
if self._depth_multiplier != 1.0:
raise ValueError('Only depth 1.0 is supported, found: {}'.
format(self._depth_multiplier))
......@@ -116,7 +105,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
filtered_image_features[feature_name] = feature
return filtered_image_features
def _extract_features(self, preprocessed_inputs):
def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
......@@ -143,7 +132,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
inputs=ops.pad_to_multiple(preprocessed_inputs,
self._pad_to_multiple),
num_classes=None,
is_training=self._is_training and self._batch_norm_trainable,
is_training=None,
global_pool=False,
output_stride=None,
store_non_strided_activations=True,
......@@ -151,7 +140,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
image_features = self._filter_features(image_features)
last_feature_map = image_features['block4']
with tf.variable_scope(self._fpn_scope_name, reuse=self._reuse_weights):
with slim.arg_scope(self._conv_hyperparams):
with slim.arg_scope(self._conv_hyperparams_fn()):
for i in range(5, 7):
last_feature_map = slim.conv2d(
last_feature_map,
......@@ -179,11 +168,9 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
min_depth,
pad_to_multiple,
conv_hyperparams,
batch_norm_trainable=True,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False,
inplace_batchnorm_update=False):
use_depthwise=False):
"""Resnet50 v1 FPN Feature Extractor for SSD Models.
Args:
......@@ -193,25 +180,15 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
"""
super(SSDResnet50V1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, resnet_v1.resnet_v1_50, 'resnet_v1_50', 'fpn',
batch_norm_trainable, reuse_weights, use_explicit_padding,
inplace_batchnorm_update)
reuse_weights, use_explicit_padding)
class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
......@@ -222,11 +199,9 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
min_depth,
pad_to_multiple,
conv_hyperparams,
batch_norm_trainable=True,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False,
inplace_batchnorm_update=False):
use_depthwise=False):
"""Resnet101 v1 FPN Feature Extractor for SSD Models.
Args:
......@@ -236,25 +211,15 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
"""
super(SSDResnet101V1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, resnet_v1.resnet_v1_101, 'resnet_v1_101', 'fpn',
batch_norm_trainable, reuse_weights, use_explicit_padding,
inplace_batchnorm_update)
reuse_weights, use_explicit_padding)
class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
......@@ -265,11 +230,9 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
min_depth,
pad_to_multiple,
conv_hyperparams,
batch_norm_trainable=True,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False,
inplace_batchnorm_update=False):
use_depthwise=False):
"""Resnet152 v1 FPN Feature Extractor for SSD Models.
Args:
......@@ -279,22 +242,12 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
"""
super(SSDResnet152V1FpnFeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, resnet_v1.resnet_v1_152, 'resnet_v1_152', 'fpn',
batch_norm_trainable, reuse_weights, use_explicit_padding,
inplace_batchnorm_update)
reuse_weights, use_explicit_padding)
......@@ -27,13 +27,10 @@ class SSDResnet50V1FeatureExtractorTest(
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
use_explicit_padding=False):
min_depth = 32
conv_hyperparams = {}
batch_norm_trainable = True
is_training = True
return ssd_resnet_v1_fpn_feature_extractor.SSDResnet50V1FpnFeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable,
use_explicit_padding=use_explicit_padding)
self.conv_hyperparams_fn, use_explicit_padding=use_explicit_padding)
def _resnet_scope_name(self):
return 'resnet_v1_50'
......@@ -47,13 +44,14 @@ class SSDResnet101V1FeatureExtractorTest(
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
use_explicit_padding=False):
min_depth = 32
conv_hyperparams = {}
batch_norm_trainable = True
is_training = True
return (
ssd_resnet_v1_fpn_feature_extractor.SSDResnet101V1FpnFeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
self.conv_hyperparams_fn,
use_explicit_padding=use_explicit_padding))
def _resnet_scope_name(self):
......@@ -68,13 +66,14 @@ class SSDResnet152V1FeatureExtractorTest(
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
use_explicit_padding=False):
min_depth = 32
conv_hyperparams = {}
batch_norm_trainable = True
is_training = True
return (
ssd_resnet_v1_fpn_feature_extractor.SSDResnet152V1FpnFeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
self.conv_hyperparams_fn,
use_explicit_padding=use_explicit_padding))
def _resnet_scope_name(self):
......
......@@ -60,6 +60,21 @@ message Ssd {
// Loss configuration for training.
optional Loss loss = 11;
// Whether to update batch norm parameters during training or not.
// When training with a relative small batch size (e.g. 1), it is
// desirable to disable batch norm update and use pretrained batch norm
// params.
//
// Note: Some feature extractors are used with canned arg_scopes
// (e.g resnet arg scopes). In these cases training behavior of batch norm
// variables may depend on both values of `batch_norm_trainable` and
// `is_training`.
//
// When canned arg_scopes are used with feature extractors `conv_hyperparams`
// will apply only to the additional layers that are added and are outside the
// canned arg_scope.
optional bool freeze_batchnorm = 16 [default = false];
// Whether to update batch_norm inplace during training. This is required
// for batch norm to work correctly on TPUs. When this is false, user must add
// a control dependency on tf.GraphKeys.UPDATE_OPS for train/loss op in order
......@@ -69,6 +84,8 @@ message Ssd {
message SsdFeatureExtractor {
reserved 6;
// Type of ssd feature extractor.
optional string type = 1;
......@@ -87,21 +104,6 @@ message SsdFeatureExtractor {
// until the resulting dimensions are even.
optional int32 pad_to_multiple = 5 [default = 1];
// Whether to update batch norm parameters during training or not.
// When training with a relative small batch size (e.g. 1), it is
// desirable to disable batch norm update and use pretrained batch norm
// params.
//
// Note: Some feature extractors are used with canned arg_scopes
// (e.g resnet arg scopes). In these cases training behavior of batch norm
// variables may depend on both values of `batch_norm_trainable` and
// `is_training`.
//
// When canned arg_scopes are used with feature extractors `conv_hyperparams`
// will apply only to the additional layers that are added and are outside the
// canned arg_scope.
optional bool batch_norm_trainable = 6 [default=true];
// Whether to use explicit padding when extracting SSD multiresolution
// features. Note that this does not apply to the base feature extractor.
optional bool use_explicit_padding = 7 [default=false];
......
......@@ -63,8 +63,10 @@ def get_spatial_image_size(image_resizer_config):
ValueError: If the model type is not recognized.
"""
if image_resizer_config.HasField("fixed_shape_resizer"):
return [image_resizer_config.fixed_shape_resizer.height,
image_resizer_config.fixed_shape_resizer.width]
return [
image_resizer_config.fixed_shape_resizer.height,
image_resizer_config.fixed_shape_resizer.width
]
if image_resizer_config.HasField("keep_aspect_ratio_resizer"):
if image_resizer_config.keep_aspect_ratio_resizer.pad_to_max_dimension:
return [image_resizer_config.keep_aspect_ratio_resizer.max_dimension] * 2
......@@ -74,7 +76,7 @@ def get_spatial_image_size(image_resizer_config):
def get_configs_from_pipeline_file(pipeline_config_path):
"""Reads configuration from a pipeline_pb2.TrainEvalPipelineConfig.
"""Reads config from a file containing pipeline_pb2.TrainEvalPipelineConfig.
Args:
pipeline_config_path: Path to pipeline_pb2.TrainEvalPipelineConfig text
......@@ -89,23 +91,34 @@ def get_configs_from_pipeline_file(pipeline_config_path):
with tf.gfile.GFile(pipeline_config_path, "r") as f:
proto_str = f.read()
text_format.Merge(proto_str, pipeline_config)
return create_configs_from_pipeline_proto(pipeline_config)
def create_configs_from_pipeline_proto(pipeline_config):
"""Creates a configs dictionary from pipeline_pb2.TrainEvalPipelineConfig.
Args:
pipeline_config: pipeline_pb2.TrainEvalPipelineConfig proto object.
Returns:
Dictionary of configuration objects. Keys are `model`, `train_config`,
`train_input_config`, `eval_config`, `eval_input_config`. Value are the
corresponding config objects.
"""
configs = {}
configs["model"] = pipeline_config.model
configs["train_config"] = pipeline_config.train_config
configs["train_input_config"] = pipeline_config.train_input_reader
configs["eval_config"] = pipeline_config.eval_config
configs["eval_input_config"] = pipeline_config.eval_input_reader
return configs
def create_pipeline_proto_from_configs(configs):
"""Creates a pipeline_pb2.TrainEvalPipelineConfig from configs dictionary.
This function nearly performs the inverse operation of
get_configs_from_pipeline_file(). Instead of returning a file path, it returns
a `TrainEvalPipelineConfig` object.
This function performs the inverse operation of
create_configs_from_pipeline_proto().
Args:
configs: Dictionary of configs. See get_configs_from_pipeline_file().
......@@ -437,7 +450,7 @@ def _get_classification_loss(model_config):
if meta_architecture == "faster_rcnn":
model = model_config.faster_rcnn
classification_loss = model.second_stage_classification_loss
if meta_architecture == "ssd":
elif meta_architecture == "ssd":
model = model_config.ssd
classification_loss = model.loss.classification_loss
else:
......
......@@ -93,6 +93,26 @@ class ConfigUtilTest(tf.test.TestCase):
self.assertProtoEquals(pipeline_config.eval_input_reader,
configs["eval_input_config"])
def test_create_configs_from_pipeline_proto(self):
"""Tests creating configs dictionary from pipeline proto."""
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.model.faster_rcnn.num_classes = 10
pipeline_config.train_config.batch_size = 32
pipeline_config.train_input_reader.label_map_path = "path/to/label_map"
pipeline_config.eval_config.num_examples = 20
pipeline_config.eval_input_reader.queue_capacity = 100
configs = config_util.create_configs_from_pipeline_proto(pipeline_config)
self.assertProtoEquals(pipeline_config.model, configs["model"])
self.assertProtoEquals(pipeline_config.train_config,
configs["train_config"])
self.assertProtoEquals(pipeline_config.train_input_reader,
configs["train_input_config"])
self.assertProtoEquals(pipeline_config.eval_config, configs["eval_config"])
self.assertProtoEquals(pipeline_config.eval_input_reader,
configs["eval_input_config"])
def test_create_pipeline_proto_from_configs(self):
"""Tests that proto can be reconstructed from configs dictionary."""
pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
......
......@@ -34,7 +34,8 @@ def _validate_label_map(label_map):
for item in label_map.item:
if item.id < 0:
raise ValueError('Label map ids should be >= 0.')
if item.id == 0 and item.name != 'background':
if (item.id == 0 and item.name != 'background' and
item.display_name != 'background'):
raise ValueError('Label map id 0 is reserved for the background label')
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functions for computing metrics like precision, recall, CorLoc and etc."""
from __future__ import division
......@@ -24,7 +23,7 @@ def compute_precision_recall(scores, labels, num_gt):
Args:
scores: A float numpy array representing detection score
labels: A boolean numpy array representing true/false positive labels
labels: A float numpy array representing weighted true/false positive labels
num_gt: Number of ground truth instances
Raises:
......@@ -37,12 +36,13 @@ def compute_precision_recall(scores, labels, num_gt):
This value is None if no ground truth labels are present.
"""
if not isinstance(
labels, np.ndarray) or labels.dtype != np.bool or len(labels.shape) != 1:
raise ValueError("labels must be single dimension bool numpy array")
if not isinstance(labels, np.ndarray) or len(labels.shape) != 1:
raise ValueError("labels must be single dimension numpy array")
if labels.dtype != np.float and labels.dtype != np.bool:
raise ValueError("labels type must be either bool or float")
if not isinstance(
scores, np.ndarray) or len(scores.shape) != 1:
if not isinstance(scores, np.ndarray) or len(scores.shape) != 1:
raise ValueError("scores must be single dimension numpy array")
if num_gt < np.sum(labels):
......@@ -56,9 +56,8 @@ def compute_precision_recall(scores, labels, num_gt):
sorted_indices = np.argsort(scores)
sorted_indices = sorted_indices[::-1]
labels = labels.astype(int)
true_positive_labels = labels[sorted_indices]
false_positive_labels = 1 - true_positive_labels
false_positive_labels = (true_positive_labels <= 0).astype(float)
cum_true_positives = np.cumsum(true_positive_labels)
cum_false_positives = np.cumsum(false_positive_labels)
precision = cum_true_positives.astype(float) / (
......@@ -90,8 +89,8 @@ def compute_average_precision(precision, recall):
raise ValueError("If precision is None, recall must also be None")
return np.NAN
if not isinstance(precision, np.ndarray) or not isinstance(recall,
np.ndarray):
if not isinstance(precision, np.ndarray) or not isinstance(
recall, np.ndarray):
raise ValueError("precision and recall must be numpy array")
if precision.dtype != np.float or recall.dtype != np.float:
raise ValueError("input must be float numpy array.")
......@@ -139,6 +138,53 @@ def compute_cor_loc(num_gt_imgs_per_class,
class
"""
return np.where(
num_gt_imgs_per_class == 0,
np.nan,
num_gt_imgs_per_class == 0, np.nan,
num_images_correctly_detected_per_class / num_gt_imgs_per_class)
def compute_median_rank_at_k(tp_fp_list, k):
"""Computes MedianRank@k, where k is the top-scoring labels.
Args:
tp_fp_list: a list of numpy arrays; each numpy array corresponds to the all
detection on a single image, where the detections are sorted by score in
descending order. Further, each numpy array element can have boolean or
float values. True positive elements have either value >0.0 or True;
any other value is considered false positive.
k: number of top-scoring proposals to take.
Returns:
median_rank: median rank of all true positive proposals among top k by
score.
"""
ranks = []
for i in range(len(tp_fp_list)):
ranks.append(
np.where(tp_fp_list[i][0:min(k, tp_fp_list[i].shape[0])] > 0)[0])
concatenated_ranks = np.concatenate(ranks)
return np.median(concatenated_ranks)
def compute_recall_at_k(tp_fp_list, num_gt, k):
"""Computes Recall@k, MedianRank@k, where k is the top-scoring labels.
Args:
tp_fp_list: a list of numpy arrays; each numpy array corresponds to the all
detection on a single image, where the detections are sorted by score in
descending order. Further, each numpy array element can have boolean or
float values. True positive elements have either value >0.0 or True;
any other value is considered false positive.
num_gt: number of groundtruth anotations.
k: number of top-scoring proposals to take.
Returns:
recall: recall evaluated on the top k by score detections.
"""
tp_fp_eval = []
for i in range(len(tp_fp_list)):
tp_fp_eval.append(tp_fp_list[i][0:min(k, tp_fp_list[i].shape[0])])
tp_fp_eval = np.concatenate(tp_fp_eval)
return np.sum(tp_fp_eval) / num_gt
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.metrics."""
import numpy as np
......@@ -25,8 +24,8 @@ class MetricsTest(tf.test.TestCase):
def test_compute_cor_loc(self):
num_gt_imgs_per_class = np.array([100, 1, 5, 1, 1], dtype=int)
num_images_correctly_detected_per_class = np.array([10, 0, 1, 0, 0],
dtype=int)
num_images_correctly_detected_per_class = np.array(
[10, 0, 1, 0, 0], dtype=int)
corloc = metrics.compute_cor_loc(num_gt_imgs_per_class,
num_images_correctly_detected_per_class)
expected_corloc = np.array([0.1, 0, 0.2, 0, 0], dtype=float)
......@@ -34,8 +33,8 @@ class MetricsTest(tf.test.TestCase):
def test_compute_cor_loc_nans(self):
num_gt_imgs_per_class = np.array([100, 0, 0, 1, 1], dtype=int)
num_images_correctly_detected_per_class = np.array([10, 0, 1, 0, 0],
dtype=int)
num_images_correctly_detected_per_class = np.array(
[10, 0, 1, 0, 0], dtype=int)
corloc = metrics.compute_cor_loc(num_gt_imgs_per_class,
num_images_correctly_detected_per_class)
expected_corloc = np.array([0.1, np.nan, np.nan, 0, 0], dtype=float)
......@@ -45,18 +44,37 @@ class MetricsTest(tf.test.TestCase):
num_gt = 10
scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float)
labels = np.array([0, 1, 1, 0, 0, 1], dtype=bool)
labels_float_type = np.array([0, 1, 1, 0, 0, 1], dtype=float)
accumulated_tp_count = np.array([0, 1, 1, 2, 2, 3], dtype=float)
expected_precision = accumulated_tp_count / np.array([1, 2, 3, 4, 5, 6])
expected_recall = accumulated_tp_count / num_gt
precision, recall = metrics.compute_precision_recall(scores, labels, num_gt)
precision_float_type, recall_float_type = metrics.compute_precision_recall(
scores, labels_float_type, num_gt)
self.assertAllClose(precision, expected_precision)
self.assertAllClose(recall, expected_recall)
self.assertAllClose(precision_float_type, expected_precision)
self.assertAllClose(recall_float_type, expected_recall)
def test_compute_precision_recall_float(self):
num_gt = 10
scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float)
labels_float = np.array([0, 1, 1, 0.5, 0, 1], dtype=float)
expected_precision = np.array(
[0., 0.5, 0.33333333, 0.5, 0.55555556, 0.63636364], dtype=float)
expected_recall = np.array([0., 0.1, 0.1, 0.2, 0.25, 0.35], dtype=float)
precision, recall = metrics.compute_precision_recall(
scores, labels_float, num_gt)
self.assertAllClose(precision, expected_precision)
self.assertAllClose(recall, expected_recall)
def test_compute_average_precision(self):
precision = np.array([0.8, 0.76, 0.9, 0.65, 0.7, 0.5, 0.55, 0], dtype=float)
recall = np.array([0.3, 0.3, 0.4, 0.4, 0.45, 0.45, 0.5, 0.5], dtype=float)
processed_precision = np.array([0.9, 0.9, 0.9, 0.7, 0.7, 0.55, 0.55, 0],
dtype=float)
processed_precision = np.array(
[0.9, 0.9, 0.9, 0.7, 0.7, 0.55, 0.55, 0], dtype=float)
recall_interval = np.array([0.3, 0, 0.1, 0, 0.05, 0, 0.05, 0], dtype=float)
expected_mean_ap = np.sum(recall_interval * processed_precision)
mean_ap = metrics.compute_average_precision(precision, recall)
......@@ -74,6 +92,52 @@ class MetricsTest(tf.test.TestCase):
ap = metrics.compute_average_precision(precision, recall)
self.assertTrue(np.isnan(ap))
def test_compute_recall_at_k(self):
num_gt = 4
tp_fp = [
np.array([1, 0, 0], dtype=float),
np.array([0, 1], dtype=float),
np.array([0, 0, 0, 0, 0], dtype=float)
]
tp_fp_bool = [
np.array([True, False, False], dtype=bool),
np.array([False, True], dtype=float),
np.array([False, False, False, False, False], dtype=float)
]
recall_1 = metrics.compute_recall_at_k(tp_fp, num_gt, 1)
recall_3 = metrics.compute_recall_at_k(tp_fp, num_gt, 3)
recall_5 = metrics.compute_recall_at_k(tp_fp, num_gt, 5)
recall_3_bool = metrics.compute_recall_at_k(tp_fp_bool, num_gt, 3)
self.assertAlmostEqual(recall_1, 0.25)
self.assertAlmostEqual(recall_3, 0.5)
self.assertAlmostEqual(recall_3_bool, 0.5)
self.assertAlmostEqual(recall_5, 0.5)
def test_compute_median_rank_at_k(self):
tp_fp = [
np.array([1, 0, 0], dtype=float),
np.array([0, 0.1], dtype=float),
np.array([0, 0, 0, 0, 0], dtype=float)
]
tp_fp_bool = [
np.array([True, False, False], dtype=bool),
np.array([False, True], dtype=float),
np.array([False, False, False, False, False], dtype=float)
]
median_ranks_1 = metrics.compute_median_rank_at_k(tp_fp, 1)
median_ranks_3 = metrics.compute_median_rank_at_k(tp_fp, 3)
median_ranks_5 = metrics.compute_median_rank_at_k(tp_fp, 5)
median_ranks_3_bool = metrics.compute_median_rank_at_k(tp_fp_bool, 3)
self.assertEquals(median_ranks_1, 0)
self.assertEquals(median_ranks_3, 0.5)
self.assertEquals(median_ranks_3_bool, 0.5)
self.assertEquals(median_ranks_5, 0.5)
if __name__ == '__main__':
tf.test.main()
......@@ -110,7 +110,8 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
evaluate_corlocs=False,
metric_prefix=None,
use_weighted_mean_ap=False,
evaluate_masks=False):
evaluate_masks=False,
group_of_weight=0.0):
"""Constructor.
Args:
......@@ -128,6 +129,12 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
of all classes.
evaluate_masks: If False, evaluation will be performed based on boxes.
If True, mask evaluation will be performed instead.
group_of_weight: Weight of group-of boxes.If set to 0, detections of the
correct class within a group-of box are ignored. If weight is > 0, then
if at least one detection falls within a group-of box with
matching_iou_threshold, weight group_of_weight is added to true
positives. Consequently, if no detection falls within a group-of box,
weight group_of_weight is added to false negatives.
Raises:
ValueError: If the category ids are not 1-indexed.
......@@ -140,11 +147,13 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
self._use_weighted_mean_ap = use_weighted_mean_ap
self._label_id_offset = 1
self._evaluate_masks = evaluate_masks
self._group_of_weight = group_of_weight
self._evaluation = ObjectDetectionEvaluation(
num_groundtruth_classes=self._num_classes,
matching_iou_threshold=self._matching_iou_threshold,
use_weighted_mean_ap=self._use_weighted_mean_ap,
label_id_offset=self._label_id_offset)
label_id_offset=self._label_id_offset,
group_of_weight=self._group_of_weight)
self._image_ids = set([])
self._evaluate_corlocs = evaluate_corlocs
self._metric_prefix = (metric_prefix + '_') if metric_prefix else ''
......@@ -383,7 +392,9 @@ class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator):
def __init__(self,
categories,
matching_iou_threshold=0.5,
evaluate_corlocs=False):
evaluate_corlocs=False,
metric_prefix='OpenImagesV2',
group_of_weight=0.0):
"""Constructor.
Args:
......@@ -393,12 +404,21 @@ class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator):
matching_iou_threshold: IOU threshold to use for matching groundtruth
boxes to detection boxes.
evaluate_corlocs: if True, additionally evaluates and returns CorLoc.
metric_prefix: Prefix name of the metric.
group_of_weight: Weight of the group-of bounding box. If set to 0 (default
for Open Images V2 detection protocol), detections of the correct class
within a group-of box are ignored. If weight is > 0, then if at least
one detection falls within a group-of box with matching_iou_threshold,
weight group_of_weight is added to true positives. Consequently, if no
detection falls within a group-of box, weight group_of_weight is added
to false negatives.
"""
super(OpenImagesDetectionEvaluator, self).__init__(
categories,
matching_iou_threshold,
evaluate_corlocs,
metric_prefix='OpenImagesV2')
metric_prefix=metric_prefix,
group_of_weight=group_of_weight)
def add_single_ground_truth_image_info(self, image_id, groundtruth_dict):
"""Adds groundtruth for a single image to be used for evaluation.
......@@ -449,6 +469,130 @@ class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator):
self._image_ids.update([image_id])
class OpenImagesDetectionChallengeEvaluator(OpenImagesDetectionEvaluator):
"""A class implements Open Images Challenge Detection metrics.
Open Images Challenge Detection metric has two major changes in comparison
with Open Images V2 detection metric:
- a custom weight might be specified for detecting an object contained in
a group-of box.
- verified image-level labels should be explicitelly provided for
evaluation: in case in image has neither positive nor negative image level
label of class c, all detections of this class on this image will be
ignored.
"""
def __init__(self,
categories,
matching_iou_threshold=0.5,
evaluate_corlocs=False,
group_of_weight=1.0):
"""Constructor.
Args:
categories: A list of dicts, each of which has the following keys -
'id': (required) an integer id uniquely identifying this category.
'name': (required) string representing category name e.g., 'cat', 'dog'.
matching_iou_threshold: IOU threshold to use for matching groundtruth
boxes to detection boxes.
evaluate_corlocs: if True, additionally evaluates and returns CorLoc.
group_of_weight: weight of a group-of box. If set to 0, detections of the
correct class within a group-of box are ignored. If weight is > 0
(default for Open Images Detection Challenge 2018), then if at least one
detection falls within a group-of box with matching_iou_threshold,
weight group_of_weight is added to true positives. Consequently, if no
detection falls within a group-of box, weight group_of_weight is added
to false negatives.
"""
super(OpenImagesDetectionChallengeEvaluator, self).__init__(
categories,
matching_iou_threshold,
evaluate_corlocs,
metric_prefix='OpenImagesChallenge2018',
group_of_weight=group_of_weight)
self._evaluatable_labels = {}
def add_single_ground_truth_image_info(self, image_id, groundtruth_dict):
"""Adds groundtruth for a single image to be used for evaluation.
Args:
image_id: A unique string/integer identifier for the image.
groundtruth_dict: A dictionary containing -
standard_fields.InputDataFields.groundtruth_boxes: float32 numpy array
of shape [num_boxes, 4] containing `num_boxes` groundtruth boxes of
the format [ymin, xmin, ymax, xmax] in absolute image coordinates.
standard_fields.InputDataFields.groundtruth_classes: integer numpy array
of shape [num_boxes] containing 1-indexed groundtruth classes for the
boxes.
standard_fields.InputDataFields.verified_labels: integer 1D numpy array
containing all classes for which labels are verified.
standard_fields.InputDataFields.groundtruth_group_of: Optional length
M numpy boolean array denoting whether a groundtruth box contains a
group of instances.
Raises:
ValueError: On adding groundtruth for an image more than once.
"""
super(OpenImagesDetectionChallengeEvaluator,
self).add_single_ground_truth_image_info(image_id, groundtruth_dict)
groundtruth_classes = (
groundtruth_dict[standard_fields.InputDataFields.groundtruth_classes] -
self._label_id_offset)
self._evaluatable_labels[image_id] = np.unique(
np.concatenate(((groundtruth_dict.get(
standard_fields.InputDataFields.verified_labels,
np.array([], dtype=int)) - self._label_id_offset),
groundtruth_classes)))
def add_single_detected_image_info(self, image_id, detections_dict):
"""Adds detections for a single image to be used for evaluation.
Args:
image_id: A unique string/integer identifier for the image.
detections_dict: A dictionary containing -
standard_fields.DetectionResultFields.detection_boxes: float32 numpy
array of shape [num_boxes, 4] containing `num_boxes` detection boxes
of the format [ymin, xmin, ymax, xmax] in absolute image coordinates.
standard_fields.DetectionResultFields.detection_scores: float32 numpy
array of shape [num_boxes] containing detection scores for the boxes.
standard_fields.DetectionResultFields.detection_classes: integer numpy
array of shape [num_boxes] containing 1-indexed detection classes for
the boxes.
Raises:
ValueError: If detection masks are not in detections dictionary.
"""
if image_id not in self._image_ids:
# Since for the correct work of evaluator it is assumed that groundtruth
# is inserted first we make sure to break the code if is it not the case.
self._image_ids.update([image_id])
self._evaluatable_labels[image_id] = np.array([])
detection_classes = (
detections_dict[standard_fields.DetectionResultFields.detection_classes]
- self._label_id_offset)
allowed_classes = np.where(
np.isin(detection_classes, self._evaluatable_labels[image_id]))
detection_classes = detection_classes[allowed_classes]
detected_boxes = detections_dict[
standard_fields.DetectionResultFields.detection_boxes][allowed_classes]
detected_scores = detections_dict[
standard_fields.DetectionResultFields.detection_scores][allowed_classes]
self._evaluation.add_single_detected_image_info(
image_key=image_id,
detected_boxes=detected_boxes,
detected_scores=detected_scores,
detected_class_labels=detection_classes)
def clear(self):
"""Clears stored data."""
super(OpenImagesDetectionChallengeEvaluator, self).clear()
self._evaluatable_labels.clear()
ObjectDetectionEvalMetrics = collections.namedtuple(
'ObjectDetectionEvalMetrics', [
'average_precisions', 'mean_ap', 'precisions', 'recalls', 'corlocs',
......@@ -465,7 +609,8 @@ class ObjectDetectionEvaluation(object):
nms_iou_threshold=1.0,
nms_max_output_boxes=10000,
use_weighted_mean_ap=False,
label_id_offset=0):
label_id_offset=0,
group_of_weight=0.0):
if num_groundtruth_classes < 1:
raise ValueError('Need at least 1 groundtruth class for evaluation.')
......@@ -473,7 +618,9 @@ class ObjectDetectionEvaluation(object):
num_groundtruth_classes=num_groundtruth_classes,
matching_iou_threshold=matching_iou_threshold,
nms_iou_threshold=nms_iou_threshold,
nms_max_output_boxes=nms_max_output_boxes)
nms_max_output_boxes=nms_max_output_boxes,
group_of_weight=group_of_weight)
self.group_of_weight = group_of_weight
self.num_class = num_groundtruth_classes
self.use_weighted_mean_ap = use_weighted_mean_ap
self.label_id_offset = label_id_offset
......@@ -483,7 +630,7 @@ class ObjectDetectionEvaluation(object):
self.groundtruth_masks = {}
self.groundtruth_is_difficult_list = {}
self.groundtruth_is_group_of_list = {}
self.num_gt_instances_per_class = np.zeros(self.num_class, dtype=int)
self.num_gt_instances_per_class = np.zeros(self.num_class, dtype=float)
self.num_gt_imgs_per_class = np.zeros(self.num_class, dtype=int)
self._initialize_detections()
......@@ -650,7 +797,10 @@ class ObjectDetectionEvaluation(object):
num_gt_instances = np.sum(groundtruth_class_labels[
~groundtruth_is_difficult_list
& ~groundtruth_is_group_of_list] == class_index)
self.num_gt_instances_per_class[class_index] += num_gt_instances
num_groupof_gt_instances = self.group_of_weight * np.sum(
groundtruth_class_labels[groundtruth_is_group_of_list] == class_index)
self.num_gt_instances_per_class[
class_index] += num_gt_instances + num_groupof_gt_instances
if np.any(groundtruth_class_labels == class_index):
self.num_gt_imgs_per_class[class_index] += 1
......@@ -677,13 +827,12 @@ class ObjectDetectionEvaluation(object):
if self.use_weighted_mean_ap:
all_scores = np.array([], dtype=float)
all_tp_fp_labels = np.array([], dtype=bool)
for class_index in range(self.num_class):
if self.num_gt_instances_per_class[class_index] == 0:
continue
if not self.scores_per_class[class_index]:
scores = np.array([], dtype=float)
tp_fp_labels = np.array([], dtype=bool)
tp_fp_labels = np.array([], dtype=float)
else:
scores = np.concatenate(self.scores_per_class[class_index])
tp_fp_labels = np.concatenate(self.tp_fp_labels_per_class[class_index])
......
......@@ -100,6 +100,126 @@ class OpenImagesV2EvaluationTest(tf.test.TestCase):
self.assertFalse(oiv2_evaluator._image_ids)
class OpenImagesDetectionChallengeEvaluatorTest(tf.test.TestCase):
def test_returns_correct_metric_values(self):
categories = [{
'id': 1,
'name': 'cat'
}, {
'id': 2,
'name': 'dog'
}, {
'id': 3,
'name': 'elephant'
}]
oivchallenge_evaluator = (
object_detection_evaluation.OpenImagesDetectionChallengeEvaluator(
categories, group_of_weight=0.5))
image_key = 'img1'
groundtruth_boxes = np.array(
[[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]], dtype=float)
groundtruth_class_labels = np.array([1, 3, 1], dtype=int)
groundtruth_is_group_of_list = np.array([False, False, True], dtype=bool)
groundtruth_verified_labels = np.array([1, 2, 3], dtype=int)
oivchallenge_evaluator.add_single_ground_truth_image_info(
image_key, {
standard_fields.InputDataFields.groundtruth_boxes:
groundtruth_boxes,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels,
standard_fields.InputDataFields.groundtruth_group_of:
groundtruth_is_group_of_list,
standard_fields.InputDataFields.verified_labels:
groundtruth_verified_labels,
})
image_key = 'img2'
groundtruth_boxes = np.array(
[[10, 10, 11, 11], [500, 500, 510, 510], [10, 10, 12, 12]], dtype=float)
groundtruth_class_labels = np.array([1, 1, 3], dtype=int)
groundtruth_is_group_of_list = np.array([False, False, True], dtype=bool)
oivchallenge_evaluator.add_single_ground_truth_image_info(
image_key, {
standard_fields.InputDataFields.groundtruth_boxes:
groundtruth_boxes,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels,
standard_fields.InputDataFields.groundtruth_group_of:
groundtruth_is_group_of_list
})
image_key = 'img3'
groundtruth_boxes = np.array([[0, 0, 1, 1]], dtype=float)
groundtruth_class_labels = np.array([2], dtype=int)
oivchallenge_evaluator.add_single_ground_truth_image_info(
image_key, {
standard_fields.InputDataFields.groundtruth_boxes:
groundtruth_boxes,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels
})
image_key = 'img1'
detected_boxes = np.array(
[[10, 10, 11, 11], [100, 100, 120, 120]], dtype=float)
detected_class_labels = np.array([2, 2], dtype=int)
detected_scores = np.array([0.7, 0.8], dtype=float)
oivchallenge_evaluator.add_single_detected_image_info(
image_key, {
standard_fields.DetectionResultFields.detection_boxes:
detected_boxes,
standard_fields.DetectionResultFields.detection_scores:
detected_scores,
standard_fields.DetectionResultFields.detection_classes:
detected_class_labels
})
image_key = 'img2'
detected_boxes = np.array(
[[10, 10, 11, 11], [100, 100, 120, 120], [100, 100, 220, 220],
[10, 10, 11, 11]],
dtype=float)
detected_class_labels = np.array([1, 1, 2, 3], dtype=int)
detected_scores = np.array([0.7, 0.8, 0.5, 0.9], dtype=float)
oivchallenge_evaluator.add_single_detected_image_info(
image_key, {
standard_fields.DetectionResultFields.detection_boxes:
detected_boxes,
standard_fields.DetectionResultFields.detection_scores:
detected_scores,
standard_fields.DetectionResultFields.detection_classes:
detected_class_labels
})
image_key = 'img3'
detected_boxes = np.array([[0, 0, 1, 1]], dtype=float)
detected_class_labels = np.array([2], dtype=int)
detected_scores = np.array([0.5], dtype=float)
oivchallenge_evaluator.add_single_detected_image_info(
image_key, {
standard_fields.DetectionResultFields.detection_boxes:
detected_boxes,
standard_fields.DetectionResultFields.detection_scores:
detected_scores,
standard_fields.DetectionResultFields.detection_classes:
detected_class_labels
})
metrics = oivchallenge_evaluator.evaluate()
self.assertAlmostEqual(
metrics['OpenImagesChallenge2018_PerformanceByCategory/AP@0.5IOU/dog'],
0.3333333333)
self.assertAlmostEqual(
metrics[
'OpenImagesChallenge2018_PerformanceByCategory/AP@0.5IOU/elephant'],
0.333333333333)
self.assertAlmostEqual(
metrics['OpenImagesChallenge2018_PerformanceByCategory/AP@0.5IOU/cat'],
0.142857142857)
self.assertAlmostEqual(
metrics['OpenImagesChallenge2018_Precision/mAP@0.5IOU'], 0.269841269)
oivchallenge_evaluator.clear()
self.assertFalse(oivchallenge_evaluator._image_ids)
class PascalEvaluationTest(tf.test.TestCase):
def test_returns_correct_metric_values_on_boxes(self):
......
......@@ -35,7 +35,8 @@ class PerImageEvaluation(object):
num_groundtruth_classes,
matching_iou_threshold=0.5,
nms_iou_threshold=0.3,
nms_max_output_boxes=50):
nms_max_output_boxes=50,
group_of_weight=0.0):
"""Initialized PerImageEvaluation by evaluation parameters.
Args:
......@@ -44,24 +45,26 @@ class PerImageEvaluation(object):
the threshold to consider whether a detection is true positive or not
nms_iou_threshold: IOU threshold used in Non Maximum Suppression.
nms_max_output_boxes: Number of maximum output boxes in NMS.
group_of_weight: Weight of the group-of boxes.
"""
self.matching_iou_threshold = matching_iou_threshold
self.nms_iou_threshold = nms_iou_threshold
self.nms_max_output_boxes = nms_max_output_boxes
self.num_groundtruth_classes = num_groundtruth_classes
self.group_of_weight = group_of_weight
def compute_object_detection_metrics(
self, detected_boxes, detected_scores, detected_class_labels,
groundtruth_boxes, groundtruth_class_labels,
groundtruth_is_difficult_list, groundtruth_is_group_of_list,
detected_masks=None, groundtruth_masks=None):
"""Evaluates detections as being tp, fp or ignored from a single image.
"""Evaluates detections as being tp, fp or weighted from a single image.
The evaluation is done in two stages:
1. All detections are matched to non group-of boxes; true positives are
determined and detections matched to difficult boxes are ignored.
2. Detections that are determined as false positives are matched against
group-of boxes and ignored if matched.
group-of boxes and weighted if matched.
Args:
detected_boxes: A float numpy array of shape [N, 4], representing N
......@@ -339,7 +342,8 @@ class PerImageEvaluation(object):
box_data=groundtruth_boxes[groundtruth_is_group_of_list],
mask_data=groundtruth_masks[groundtruth_is_group_of_list])
iou = np_box_mask_list_ops.iou(detected_boxlist, gt_non_group_of_boxlist)
ioa = np_box_mask_list_ops.ioa(gt_group_of_boxlist, detected_boxlist)
ioa = np.transpose(
np_box_mask_list_ops.ioa(gt_group_of_boxlist, detected_boxlist))
scores = detected_boxlist.get_field('scores')
num_boxes = detected_boxlist.num_boxes()
return iou, ioa, scores, num_boxes
......@@ -380,7 +384,8 @@ class PerImageEvaluation(object):
gt_group_of_boxlist = np_box_list.BoxList(
groundtruth_boxes[groundtruth_is_group_of_list])
iou = np_box_list_ops.iou(detected_boxlist, gt_non_group_of_boxlist)
ioa = np_box_list_ops.ioa(gt_group_of_boxlist, detected_boxlist)
ioa = np.transpose(
np_box_list_ops.ioa(gt_group_of_boxlist, detected_boxlist))
scores = detected_boxlist.get_field('scores')
num_boxes = detected_boxlist.num_boxes()
return iou, ioa, scores, num_boxes
......@@ -455,7 +460,8 @@ class PerImageEvaluation(object):
# 1. All detections are matched to non group-of boxes; true positives are
# determined and detections matched to difficult boxes are ignored.
# 2. Detections that are determined as false positives are matched against
# group-of boxes and ignored if matched.
# group-of boxes and scored with weight w per ground truth box is
# matched.
# Tp-fp evaluation for non-group of boxes (if any).
if iou.shape[1] > 0:
......@@ -473,18 +479,29 @@ class PerImageEvaluation(object):
else:
is_matched_to_difficult_box[i] = True
scores_group_of = np.zeros(ioa.shape[1], dtype=float)
tp_fp_labels_group_of = self.group_of_weight * np.ones(
ioa.shape[1], dtype=float)
# Tp-fp evaluation for group of boxes.
if ioa.shape[0] > 0:
max_overlap_group_of_gt = np.max(ioa, axis=0)
if ioa.shape[1] > 0:
max_overlap_group_of_gt_ids = np.argmax(ioa, axis=1)
for i in range(num_detected_boxes):
gt_id = max_overlap_group_of_gt_ids[i]
if (not tp_fp_labels[i] and not is_matched_to_difficult_box[i] and
max_overlap_group_of_gt[i] >= self.matching_iou_threshold):
ioa[i, gt_id] >= self.matching_iou_threshold):
is_matched_to_group_of_box[i] = True
return scores[~is_matched_to_difficult_box
& ~is_matched_to_group_of_box], tp_fp_labels[
~is_matched_to_difficult_box
& ~is_matched_to_group_of_box]
scores_group_of[gt_id] = max(scores_group_of[gt_id], scores[i])
selector = np.where((scores_group_of > 0) & (tp_fp_labels_group_of > 0))
scores_group_of = scores_group_of[selector]
tp_fp_labels_group_of = tp_fp_labels_group_of[selector]
return np.concatenate(
(scores[~is_matched_to_difficult_box
& ~is_matched_to_group_of_box],
scores_group_of)), np.concatenate(
(tp_fp_labels[~is_matched_to_difficult_box
& ~is_matched_to_group_of_box].astype(float),
tp_fp_labels_group_of))
def _get_ith_class_arrays(self, detected_boxes, detected_scores,
detected_masks, detected_class_labels,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment