Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
ed4e22b8
Unverified
Commit
ed4e22b8
authored
Apr 16, 2018
by
pkulzc
Committed by
GitHub
Apr 16, 2018
Browse files
Merge pull request #3973 from pkulzc/master
Object detection internal changes
parents
cac90a0e
13b89b93
Changes
61
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1017 additions
and
168 deletions
+1017
-168
research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor.py
...t_detection/models/ssd_resnet_v1_fpn_feature_extractor.py
+67
-80
research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_test.py
...ection/models/ssd_resnet_v1_fpn_feature_extractor_test.py
+11
-12
research/object_detection/protos/box_predictor.proto
research/object_detection/protos/box_predictor.proto
+1
-0
research/object_detection/protos/ssd.proto
research/object_detection/protos/ssd.proto
+25
-15
research/object_detection/protos/train.proto
research/object_detection/protos/train.proto
+8
-1
research/object_detection/samples/configs/ssd_inception_v2_coco.config
...ct_detection/samples/configs/ssd_inception_v2_coco.config
+1
-0
research/object_detection/samples/configs/ssd_inception_v2_pets.config
...ct_detection/samples/configs/ssd_inception_v2_pets.config
+1
-0
research/object_detection/samples/configs/ssd_inception_v3_pets.config
...ct_detection/samples/configs/ssd_inception_v3_pets.config
+1
-0
research/object_detection/trainer.py
research/object_detection/trainer.py
+34
-5
research/object_detection/trainer_test.py
research/object_detection/trainer_test.py
+48
-1
research/object_detection/utils/config_util.py
research/object_detection/utils/config_util.py
+21
-8
research/object_detection/utils/config_util_test.py
research/object_detection/utils/config_util_test.py
+20
-0
research/object_detection/utils/label_map_util.py
research/object_detection/utils/label_map_util.py
+2
-1
research/object_detection/utils/metrics.py
research/object_detection/utils/metrics.py
+59
-13
research/object_detection/utils/metrics_test.py
research/object_detection/utils/metrics_test.py
+71
-7
research/object_detection/utils/object_detection_evaluation.py
...rch/object_detection/utils/object_detection_evaluation.py
+159
-10
research/object_detection/utils/object_detection_evaluation_test.py
...bject_detection/utils/object_detection_evaluation_test.py
+120
-0
research/object_detection/utils/per_image_evaluation.py
research/object_detection/utils/per_image_evaluation.py
+31
-14
research/object_detection/utils/per_image_evaluation_test.py
research/object_detection/utils/per_image_evaluation_test.py
+119
-1
research/object_detection/utils/per_image_vrd_evaluation.py
research/object_detection/utils/per_image_vrd_evaluation.py
+218
-0
No files found.
research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor.py
View file @
ed4e22b8
...
...
@@ -21,6 +21,7 @@ import tensorflow as tf
from
object_detection.meta_architectures
import
ssd_meta_arch
from
object_detection.models
import
feature_map_generators
from
object_detection.utils
import
context_manager
from
object_detection.utils
import
ops
from
object_detection.utils
import
shape_utils
from
nets
import
resnet_v1
...
...
@@ -36,15 +37,14 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
conv_hyperparams
_fn
,
resnet_base_fn
,
resnet_scope_name
,
fpn_scope_name
,
batch_norm_trainable
=
True
,
reuse_weights
=
None
,
use_explicit_padding
=
False
,
use_depthwise
=
False
,
inplace_batchnorm_update
=
False
):
override_base_feature_extractor_hyperparams
=
False
):
"""SSD FPN feature extractor based on Resnet v1 architecture.
Args:
...
...
@@ -54,32 +54,28 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops in the layers that are added on top of the
base feature extractor.
resnet_base_fn: base resnet network to use.
resnet_scope_name: scope name under which to construct resnet
fpn_scope_name: scope name under which to construct the feature pyramid
network.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_fn`.
Raises:
ValueError: On supplying invalid arguments for unused arguments.
"""
super
(
_SSDResnetV1FpnFeatureExtractor
,
self
).
__init__
(
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
,
reuse_weights
,
use_explicit_padding
,
inplace_batchnorm_update
)
conv_hyperparams
_fn
,
reuse_weights
,
use_explicit_padding
,
override_base_feature_extractor_hyperparams
)
if
self
.
_depth_multiplier
!=
1.0
:
raise
ValueError
(
'Only depth 1.0 is supported, found: {}'
.
format
(
self
.
_depth_multiplier
))
...
...
@@ -116,7 +112,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
filtered_image_features
[
feature_name
]
=
feature
return
filtered_image_features
def
_
extract_features
(
self
,
preprocessed_inputs
):
def
extract_features
(
self
,
preprocessed_inputs
):
"""Extract features from preprocessed inputs.
Args:
...
...
@@ -139,19 +135,22 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
with
tf
.
variable_scope
(
self
.
_resnet_scope_name
,
reuse
=
self
.
_reuse_weights
)
as
scope
:
with
slim
.
arg_scope
(
resnet_v1
.
resnet_arg_scope
()):
_
,
image_features
=
self
.
_resnet_base_fn
(
inputs
=
ops
.
pad_to_multiple
(
preprocessed_inputs
,
self
.
_pad_to_multiple
),
num_classes
=
None
,
is_training
=
self
.
_is_training
and
self
.
_batch_norm_trainable
,
global_pool
=
False
,
output_stride
=
None
,
store_non_strided_activations
=
True
,
scope
=
scope
)
with
(
slim
.
arg_scope
(
self
.
_conv_hyperparams_fn
())
if
self
.
_override_base_feature_extractor_hyperparams
else
context_manager
.
IdentityContextManager
()):
_
,
image_features
=
self
.
_resnet_base_fn
(
inputs
=
ops
.
pad_to_multiple
(
preprocessed_inputs
,
self
.
_pad_to_multiple
),
num_classes
=
None
,
is_training
=
None
,
global_pool
=
False
,
output_stride
=
None
,
store_non_strided_activations
=
True
,
scope
=
scope
)
image_features
=
self
.
_filter_features
(
image_features
)
last_feature_map
=
image_features
[
'block4'
]
with
tf
.
variable_scope
(
self
.
_fpn_scope_name
,
reuse
=
self
.
_reuse_weights
):
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
):
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
_fn
()
):
for
i
in
range
(
5
,
7
):
last_feature_map
=
slim
.
conv2d
(
last_feature_map
,
...
...
@@ -178,40 +177,36 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
=
True
,
conv_hyperparams_fn
,
reuse_weights
=
None
,
use_explicit_padding
=
False
,
use_depthwise
=
False
,
inplace_batchnorm_update
=
False
):
"""Resnet50
v
1 FPN
F
eature
E
xtractor
for SSD Models
.
override_base_feature_extractor_hyperparams
=
False
):
"""
SSD
Resnet50
V
1 FPN
f
eature
e
xtractor
based on Resnet v1 architecture
.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
UNUSED currently.
min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops in the layers that are added on top of the
base feature extractor.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_fn`.
"""
super
(
SSDResnet50V1FpnFeatureExtractor
,
self
).
__init__
(
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
resnet_v1
.
resnet_v1_50
,
'resnet_v1_50'
,
'fpn'
,
batch_norm_trainable
,
reuse_weights
,
use_explicit_padding
,
inplace_batchnorm_update
)
conv_hyperparams
_fn
,
resnet_v1
.
resnet_v1_50
,
'resnet_v1_50'
,
'fpn'
,
reuse_weights
,
use_explicit_padding
,
override_base_feature_extractor_hyperparams
)
class
SSDResnet101V1FpnFeatureExtractor
(
_SSDResnetV1FpnFeatureExtractor
):
...
...
@@ -221,40 +216,36 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
=
True
,
conv_hyperparams_fn
,
reuse_weights
=
None
,
use_explicit_padding
=
False
,
use_depthwise
=
False
,
inplace_batchnorm_update
=
False
):
"""Resnet101
v
1 FPN
F
eature
E
xtractor
for SSD Models
.
override_base_feature_extractor_hyperparams
=
False
):
"""
SSD
Resnet101
V
1 FPN
f
eature
e
xtractor
based on Resnet v1 architecture
.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
UNUSED currently.
min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops in the layers that are added on top of the
base feature extractor.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_fn`.
"""
super
(
SSDResnet101V1FpnFeatureExtractor
,
self
).
__init__
(
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
resnet_v1
.
resnet_v1_101
,
'resnet_v1_101'
,
'fpn'
,
batch_norm_trainable
,
reuse_weights
,
use_explicit_padding
,
inplace_batchnorm_update
)
conv_hyperparams
_fn
,
resnet_v1
.
resnet_v1_101
,
'resnet_v1_101'
,
'fpn'
,
reuse_weights
,
use_explicit_padding
,
override_base_feature_extractor_hyperparams
)
class
SSDResnet152V1FpnFeatureExtractor
(
_SSDResnetV1FpnFeatureExtractor
):
...
...
@@ -264,37 +255,33 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
=
True
,
conv_hyperparams_fn
,
reuse_weights
=
None
,
use_explicit_padding
=
False
,
use_depthwise
=
False
,
inplace_batchnorm_update
=
False
):
"""Resnet152
v
1 FPN
F
eature
E
xtractor
for SSD Models
.
override_base_feature_extractor_hyperparams
=
False
):
"""
SSD
Resnet152
V
1 FPN
f
eature
e
xtractor
based on Resnet v1 architecture
.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
UNUSED currently.
min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops in the layers that are added on top of the
base feature extractor.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_fn`.
"""
super
(
SSDResnet152V1FpnFeatureExtractor
,
self
).
__init__
(
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
resnet_v1
.
resnet_v1_152
,
'resnet_v1_152'
,
'fpn'
,
batch_norm_trainable
,
reuse_weights
,
use_explicit_padding
,
inplace_batchnorm_update
)
conv_hyperparams
_fn
,
resnet_v1
.
resnet_v1_152
,
'resnet_v1_152'
,
'fpn'
,
reuse_weights
,
use_explicit_padding
,
override_base_feature_extractor_hyperparams
)
research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_test.py
View file @
ed4e22b8
...
...
@@ -27,13 +27,10 @@ class SSDResnet50V1FeatureExtractorTest(
def
_create_feature_extractor
(
self
,
depth_multiplier
,
pad_to_multiple
,
use_explicit_padding
=
False
):
min_depth
=
32
conv_hyperparams
=
{}
batch_norm_trainable
=
True
is_training
=
True
return
ssd_resnet_v1_fpn_feature_extractor
.
SSDResnet50V1FpnFeatureExtractor
(
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
,
use_explicit_padding
=
use_explicit_padding
)
self
.
conv_hyperparams_fn
,
use_explicit_padding
=
use_explicit_padding
)
def
_resnet_scope_name
(
self
):
return
'resnet_v1_50'
...
...
@@ -47,13 +44,14 @@ class SSDResnet101V1FeatureExtractorTest(
def
_create_feature_extractor
(
self
,
depth_multiplier
,
pad_to_multiple
,
use_explicit_padding
=
False
):
min_depth
=
32
conv_hyperparams
=
{}
batch_norm_trainable
=
True
is_training
=
True
return
(
ssd_resnet_v1_fpn_feature_extractor
.
SSDResnet101V1FpnFeatureExtractor
(
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
,
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
self
.
conv_hyperparams_fn
,
use_explicit_padding
=
use_explicit_padding
))
def
_resnet_scope_name
(
self
):
...
...
@@ -68,13 +66,14 @@ class SSDResnet152V1FeatureExtractorTest(
def
_create_feature_extractor
(
self
,
depth_multiplier
,
pad_to_multiple
,
use_explicit_padding
=
False
):
min_depth
=
32
conv_hyperparams
=
{}
batch_norm_trainable
=
True
is_training
=
True
return
(
ssd_resnet_v1_fpn_feature_extractor
.
SSDResnet152V1FpnFeatureExtractor
(
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
,
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
self
.
conv_hyperparams_fn
,
use_explicit_padding
=
use_explicit_padding
))
def
_resnet_scope_name
(
self
):
...
...
research/object_detection/protos/box_predictor.proto
View file @
ed4e22b8
...
...
@@ -118,6 +118,7 @@ message MaskRCNNBoxPredictor {
// The number of convolutions applied to image_features in the mask prediction
// branch.
optional
int32
mask_prediction_num_conv_layers
=
11
[
default
=
2
];
optional
bool
masks_are_class_agnostic
=
12
[
default
=
false
];
}
message
RfcnBoxPredictor
{
...
...
research/object_detection/protos/ssd.proto
View file @
ed4e22b8
...
...
@@ -60,6 +60,21 @@ message Ssd {
// Loss configuration for training.
optional
Loss
loss
=
11
;
// Whether to update batch norm parameters during training or not.
// When training with a relative small batch size (e.g. 1), it is
// desirable to disable batch norm update and use pretrained batch norm
// params.
//
// Note: Some feature extractors are used with canned arg_scopes
// (e.g resnet arg scopes). In these cases training behavior of batch norm
// variables may depend on both values of `batch_norm_trainable` and
// `is_training`.
//
// When canned arg_scopes are used with feature extractors `conv_hyperparams`
// will apply only to the additional layers that are added and are outside the
// canned arg_scope.
optional
bool
freeze_batchnorm
=
16
[
default
=
false
];
// Whether to update batch_norm inplace during training. This is required
// for batch norm to work correctly on TPUs. When this is false, user must add
// a control dependency on tf.GraphKeys.UPDATE_OPS for train/loss op in order
...
...
@@ -69,6 +84,8 @@ message Ssd {
message
SsdFeatureExtractor
{
reserved
6
;
// Type of ssd feature extractor.
optional
string
type
=
1
;
...
...
@@ -82,26 +99,19 @@ message SsdFeatureExtractor {
// of the base feature extractor.
optional
Hyperparams
conv_hyperparams
=
4
;
// Normally, SSD feature extractors are constructed by reusing an existing
// base feature extractor (that has its own hyperparams) and adding new layers
// on top of it. `conv_hyperparams` above normally applies only to the new
// layers while base feature extractor uses its own default hyperparams. If
// this value is set to true, the base feature extractor's hyperparams will be
// overridden with the `conv_hyperparams`.
optional
bool
override_base_feature_extractor_hyperparams
=
9
[
default
=
false
];
// The nearest multiple to zero-pad the input height and width dimensions to.
// For example, if pad_to_multiple = 2, input dimensions are zero-padded
// until the resulting dimensions are even.
optional
int32
pad_to_multiple
=
5
[
default
=
1
];
// Whether to update batch norm parameters during training or not.
// When training with a relative small batch size (e.g. 1), it is
// desirable to disable batch norm update and use pretrained batch norm
// params.
//
// Note: Some feature extractors are used with canned arg_scopes
// (e.g resnet arg scopes). In these cases training behavior of batch norm
// variables may depend on both values of `batch_norm_trainable` and
// `is_training`.
//
// When canned arg_scopes are used with feature extractors `conv_hyperparams`
// will apply only to the additional layers that are added and are outside the
// canned arg_scope.
optional
bool
batch_norm_trainable
=
6
[
default
=
true
];
// Whether to use explicit padding when extracting SSD multiresolution
// features. Note that this does not apply to the base feature extractor.
optional
bool
use_explicit_padding
=
7
[
default
=
false
];
...
...
research/object_detection/protos/train.proto
View file @
ed4e22b8
...
...
@@ -6,8 +6,11 @@ import "object_detection/protos/optimizer.proto";
import
"object_detection/protos/preprocessor.proto"
;
// Message for configuring DetectionModel training jobs (train.py).
// Next id: 25
message
TrainConfig
{
// Input queue batch size.
// Effective batch size to use for training.
// For TPU (or sync SGD jobs), the batch size per core (or GPU) is going to be
// `batch_size` / number of cores (or `batch_size` / number of GPUs).
optional
uint32
batch_size
=
1
[
default
=
32
];
// Data augmentation options.
...
...
@@ -78,6 +81,10 @@ message TrainConfig {
// Note that only Sigmoid classification losses should be used.
optional
bool
merge_multiple_label_boxes
=
17
[
default
=
false
];
// If true, will use multiclass scores from object annotations as ground
// truth. Currently only compatible with annotated image inputs.
optional
bool
use_multiclass_scores
=
24
[
default
=
false
];
// Whether to add regularization loss to `total_loss`. This is true by
// default and adds all regularization losses defined in the model to
// `total_loss`.
...
...
research/object_detection/samples/configs/ssd_inception_v2_coco.config
View file @
ed4e22b8
...
...
@@ -98,6 +98,7 @@ model {
epsilon
:
0
.
001
,
}
}
override_base_feature_extractor_hyperparams
:
true
}
loss
{
classification_loss
{
...
...
research/object_detection/samples/configs/ssd_inception_v2_pets.config
View file @
ed4e22b8
...
...
@@ -98,6 +98,7 @@ model {
epsilon
:
0
.
001
,
}
}
override_base_feature_extractor_hyperparams
:
true
}
loss
{
classification_loss
{
...
...
research/object_detection/samples/configs/ssd_inception_v3_pets.config
View file @
ed4e22b8
...
...
@@ -98,6 +98,7 @@ model {
epsilon
:
0
.
01
,
}
}
override_base_feature_extractor_hyperparams
:
true
}
loss
{
classification_loss
{
...
...
research/object_detection/trainer.py
View file @
ed4e22b8
...
...
@@ -69,10 +69,13 @@ def create_input_queue(batch_size_per_clone, create_tensor_dict_fn,
in
tensor_dict
)
include_keypoints
=
(
fields
.
InputDataFields
.
groundtruth_keypoints
in
tensor_dict
)
include_multiclass_scores
=
(
fields
.
InputDataFields
.
multiclass_scores
in
tensor_dict
)
if
data_augmentation_options
:
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
data_augmentation_options
,
func_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_multiclass_scores
=
include_multiclass_scores
,
include_instance_masks
=
include_instance_masks
,
include_keypoints
=
include_keypoints
))
...
...
@@ -85,7 +88,10 @@ def create_input_queue(batch_size_per_clone, create_tensor_dict_fn,
return
input_queue
def
get_inputs
(
input_queue
,
num_classes
,
merge_multiple_label_boxes
=
False
):
def
get_inputs
(
input_queue
,
num_classes
,
merge_multiple_label_boxes
=
False
,
use_multiclass_scores
=
False
):
"""Dequeues batch and constructs inputs to object detection model.
Args:
...
...
@@ -95,6 +101,8 @@ def get_inputs(input_queue, num_classes, merge_multiple_label_boxes=False):
or not. Defaults to false. Merged boxes are represented with a single
box and a k-hot encoding of the multiple labels associated with the
boxes.
use_multiclass_scores: Whether to use multiclass scores instead of
groundtruth_classes.
Returns:
images: a list of 3-D float tensor of images.
...
...
@@ -123,9 +131,19 @@ def get_inputs(input_queue, num_classes, merge_multiple_label_boxes=False):
classes_gt
=
tf
.
cast
(
read_data
[
fields
.
InputDataFields
.
groundtruth_classes
],
tf
.
int32
)
classes_gt
-=
label_id_offset
if
merge_multiple_label_boxes
and
use_multiclass_scores
:
raise
ValueError
(
'Using both merge_multiple_label_boxes and use_multiclass_scores is'
'not supported'
)
if
merge_multiple_label_boxes
:
location_gt
,
classes_gt
,
_
=
util_ops
.
merge_boxes_with_multiple_labels
(
location_gt
,
classes_gt
,
num_classes
)
elif
use_multiclass_scores
:
classes_gt
=
tf
.
cast
(
read_data
[
fields
.
InputDataFields
.
multiclass_scores
],
tf
.
float32
)
else
:
classes_gt
=
util_ops
.
padded_one_hot_encoding
(
indices
=
classes_gt
,
depth
=
num_classes
,
left_pad
=
0
)
...
...
@@ -155,7 +173,8 @@ def _create_losses(input_queue, create_model_fn, train_config):
groundtruth_masks_list
,
groundtruth_keypoints_list
,
_
)
=
get_inputs
(
input_queue
,
detection_model
.
num_classes
,
train_config
.
merge_multiple_label_boxes
)
train_config
.
merge_multiple_label_boxes
,
train_config
.
use_multiclass_scores
)
preprocessed_images
=
[]
true_image_shapes
=
[]
...
...
@@ -183,9 +202,19 @@ def _create_losses(input_queue, create_model_fn, train_config):
tf
.
losses
.
add_loss
(
loss_tensor
)
def
train
(
create_tensor_dict_fn
,
create_model_fn
,
train_config
,
master
,
task
,
num_clones
,
worker_replicas
,
clone_on_cpu
,
ps_tasks
,
worker_job_name
,
is_chief
,
train_dir
,
graph_hook_fn
=
None
):
def
train
(
create_tensor_dict_fn
,
create_model_fn
,
train_config
,
master
,
task
,
num_clones
,
worker_replicas
,
clone_on_cpu
,
ps_tasks
,
worker_job_name
,
is_chief
,
train_dir
,
graph_hook_fn
=
None
):
"""Training function for detection models.
Args:
...
...
research/object_detection/trainer_test.py
View file @
ed4e22b8
...
...
@@ -37,12 +37,15 @@ def get_input_function():
[
1
],
minval
=
0
,
maxval
=
NUMBER_OF_CLASSES
,
dtype
=
tf
.
int32
)
box_label
=
tf
.
random_uniform
(
[
1
,
4
],
minval
=
0.4
,
maxval
=
0.6
,
dtype
=
tf
.
float32
)
multiclass_scores
=
tf
.
random_uniform
(
[
1
,
NUMBER_OF_CLASSES
],
minval
=
0.4
,
maxval
=
0.6
,
dtype
=
tf
.
float32
)
return
{
fields
.
InputDataFields
.
image
:
image
,
fields
.
InputDataFields
.
key
:
key
,
fields
.
InputDataFields
.
groundtruth_classes
:
class_label
,
fields
.
InputDataFields
.
groundtruth_boxes
:
box_label
fields
.
InputDataFields
.
groundtruth_boxes
:
box_label
,
fields
.
InputDataFields
.
multiclass_scores
:
multiclass_scores
}
...
...
@@ -203,6 +206,50 @@ class TrainerTest(tf.test.TestCase):
train_dir
=
self
.
get_temp_dir
()
trainer
.
train
(
create_tensor_dict_fn
=
get_input_function
,
create_model_fn
=
FakeDetectionModel
,
train_config
=
train_config
,
master
=
''
,
task
=
0
,
num_clones
=
1
,
worker_replicas
=
1
,
clone_on_cpu
=
True
,
ps_tasks
=
0
,
worker_job_name
=
'worker'
,
is_chief
=
True
,
train_dir
=
train_dir
)
def
test_configure_trainer_with_multiclass_scores_and_train_two_steps
(
self
):
train_config_text_proto
=
"""
optimizer {
adam_optimizer {
learning_rate {
constant_learning_rate {
learning_rate: 0.01
}
}
}
}
data_augmentation_options {
random_adjust_brightness {
max_delta: 0.2
}
}
data_augmentation_options {
random_adjust_contrast {
min_delta: 0.7
max_delta: 1.1
}
}
num_steps: 2
use_multiclass_scores: true
"""
train_config
=
train_pb2
.
TrainConfig
()
text_format
.
Merge
(
train_config_text_proto
,
train_config
)
train_dir
=
self
.
get_temp_dir
()
trainer
.
train
(
create_tensor_dict_fn
=
get_input_function
,
create_model_fn
=
FakeDetectionModel
,
train_config
=
train_config
,
...
...
research/object_detection/utils/config_util.py
View file @
ed4e22b8
...
...
@@ -63,8 +63,10 @@ def get_spatial_image_size(image_resizer_config):
ValueError: If the model type is not recognized.
"""
if
image_resizer_config
.
HasField
(
"fixed_shape_resizer"
):
return
[
image_resizer_config
.
fixed_shape_resizer
.
height
,
image_resizer_config
.
fixed_shape_resizer
.
width
]
return
[
image_resizer_config
.
fixed_shape_resizer
.
height
,
image_resizer_config
.
fixed_shape_resizer
.
width
]
if
image_resizer_config
.
HasField
(
"keep_aspect_ratio_resizer"
):
if
image_resizer_config
.
keep_aspect_ratio_resizer
.
pad_to_max_dimension
:
return
[
image_resizer_config
.
keep_aspect_ratio_resizer
.
max_dimension
]
*
2
...
...
@@ -74,7 +76,7 @@ def get_spatial_image_size(image_resizer_config):
def
get_configs_from_pipeline_file
(
pipeline_config_path
):
"""Reads config
uration
from a pipeline_pb2.TrainEvalPipelineConfig.
"""Reads config from a
file containing
pipeline_pb2.TrainEvalPipelineConfig.
Args:
pipeline_config_path: Path to pipeline_pb2.TrainEvalPipelineConfig text
...
...
@@ -89,23 +91,34 @@ def get_configs_from_pipeline_file(pipeline_config_path):
with
tf
.
gfile
.
GFile
(
pipeline_config_path
,
"r"
)
as
f
:
proto_str
=
f
.
read
()
text_format
.
Merge
(
proto_str
,
pipeline_config
)
return
create_configs_from_pipeline_proto
(
pipeline_config
)
def
create_configs_from_pipeline_proto
(
pipeline_config
):
"""Creates a configs dictionary from pipeline_pb2.TrainEvalPipelineConfig.
Args:
pipeline_config: pipeline_pb2.TrainEvalPipelineConfig proto object.
Returns:
Dictionary of configuration objects. Keys are `model`, `train_config`,
`train_input_config`, `eval_config`, `eval_input_config`. Value are the
corresponding config objects.
"""
configs
=
{}
configs
[
"model"
]
=
pipeline_config
.
model
configs
[
"train_config"
]
=
pipeline_config
.
train_config
configs
[
"train_input_config"
]
=
pipeline_config
.
train_input_reader
configs
[
"eval_config"
]
=
pipeline_config
.
eval_config
configs
[
"eval_input_config"
]
=
pipeline_config
.
eval_input_reader
return
configs
def
create_pipeline_proto_from_configs
(
configs
):
"""Creates a pipeline_pb2.TrainEvalPipelineConfig from configs dictionary.
This function nearly performs the inverse operation of
get_configs_from_pipeline_file(). Instead of returning a file path, it returns
a `TrainEvalPipelineConfig` object.
This function performs the inverse operation of
create_configs_from_pipeline_proto().
Args:
configs: Dictionary of configs. See get_configs_from_pipeline_file().
...
...
@@ -437,7 +450,7 @@ def _get_classification_loss(model_config):
if
meta_architecture
==
"faster_rcnn"
:
model
=
model_config
.
faster_rcnn
classification_loss
=
model
.
second_stage_classification_loss
if
meta_architecture
==
"ssd"
:
el
if
meta_architecture
==
"ssd"
:
model
=
model_config
.
ssd
classification_loss
=
model
.
loss
.
classification_loss
else
:
...
...
research/object_detection/utils/config_util_test.py
View file @
ed4e22b8
...
...
@@ -93,6 +93,26 @@ class ConfigUtilTest(tf.test.TestCase):
self
.
assertProtoEquals
(
pipeline_config
.
eval_input_reader
,
configs
[
"eval_input_config"
])
def
test_create_configs_from_pipeline_proto
(
self
):
"""Tests creating configs dictionary from pipeline proto."""
pipeline_config
=
pipeline_pb2
.
TrainEvalPipelineConfig
()
pipeline_config
.
model
.
faster_rcnn
.
num_classes
=
10
pipeline_config
.
train_config
.
batch_size
=
32
pipeline_config
.
train_input_reader
.
label_map_path
=
"path/to/label_map"
pipeline_config
.
eval_config
.
num_examples
=
20
pipeline_config
.
eval_input_reader
.
queue_capacity
=
100
configs
=
config_util
.
create_configs_from_pipeline_proto
(
pipeline_config
)
self
.
assertProtoEquals
(
pipeline_config
.
model
,
configs
[
"model"
])
self
.
assertProtoEquals
(
pipeline_config
.
train_config
,
configs
[
"train_config"
])
self
.
assertProtoEquals
(
pipeline_config
.
train_input_reader
,
configs
[
"train_input_config"
])
self
.
assertProtoEquals
(
pipeline_config
.
eval_config
,
configs
[
"eval_config"
])
self
.
assertProtoEquals
(
pipeline_config
.
eval_input_reader
,
configs
[
"eval_input_config"
])
def
test_create_pipeline_proto_from_configs
(
self
):
"""Tests that proto can be reconstructed from configs dictionary."""
pipeline_config_path
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
"pipeline.config"
)
...
...
research/object_detection/utils/label_map_util.py
View file @
ed4e22b8
...
...
@@ -34,7 +34,8 @@ def _validate_label_map(label_map):
for
item
in
label_map
.
item
:
if
item
.
id
<
0
:
raise
ValueError
(
'Label map ids should be >= 0.'
)
if
item
.
id
==
0
and
item
.
name
!=
'background'
:
if
(
item
.
id
==
0
and
item
.
name
!=
'background'
and
item
.
display_name
!=
'background'
):
raise
ValueError
(
'Label map id 0 is reserved for the background label'
)
...
...
research/object_detection/utils/metrics.py
View file @
ed4e22b8
...
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functions for computing metrics like precision, recall, CorLoc and etc."""
from
__future__
import
division
...
...
@@ -24,7 +23,7 @@ def compute_precision_recall(scores, labels, num_gt):
Args:
scores: A float numpy array representing detection score
labels: A
boolean
numpy array representing true/false positive labels
labels: A
float
numpy array representing
weighted
true/false positive labels
num_gt: Number of ground truth instances
Raises:
...
...
@@ -37,12 +36,13 @@ def compute_precision_recall(scores, labels, num_gt):
This value is None if no ground truth labels are present.
"""
if
not
isinstance
(
labels
,
np
.
ndarray
)
or
labels
.
dtype
!=
np
.
bool
or
len
(
labels
.
shape
)
!=
1
:
raise
ValueError
(
"labels must be single dimension bool numpy array"
)
if
not
isinstance
(
labels
,
np
.
ndarray
)
or
len
(
labels
.
shape
)
!=
1
:
raise
ValueError
(
"labels must be single dimension numpy array"
)
if
labels
.
dtype
!=
np
.
float
and
labels
.
dtype
!=
np
.
bool
:
raise
ValueError
(
"labels type must be either bool or float"
)
if
not
isinstance
(
scores
,
np
.
ndarray
)
or
len
(
scores
.
shape
)
!=
1
:
if
not
isinstance
(
scores
,
np
.
ndarray
)
or
len
(
scores
.
shape
)
!=
1
:
raise
ValueError
(
"scores must be single dimension numpy array"
)
if
num_gt
<
np
.
sum
(
labels
):
...
...
@@ -56,9 +56,8 @@ def compute_precision_recall(scores, labels, num_gt):
sorted_indices
=
np
.
argsort
(
scores
)
sorted_indices
=
sorted_indices
[::
-
1
]
labels
=
labels
.
astype
(
int
)
true_positive_labels
=
labels
[
sorted_indices
]
false_positive_labels
=
1
-
true_positive_labels
false_positive_labels
=
(
true_positive_labels
<=
0
).
astype
(
float
)
cum_true_positives
=
np
.
cumsum
(
true_positive_labels
)
cum_false_positives
=
np
.
cumsum
(
false_positive_labels
)
precision
=
cum_true_positives
.
astype
(
float
)
/
(
...
...
@@ -90,8 +89,8 @@ def compute_average_precision(precision, recall):
raise
ValueError
(
"If precision is None, recall must also be None"
)
return
np
.
NAN
if
not
isinstance
(
precision
,
np
.
ndarray
)
or
not
isinstance
(
recall
,
np
.
ndarray
):
if
not
isinstance
(
precision
,
np
.
ndarray
)
or
not
isinstance
(
recall
,
np
.
ndarray
):
raise
ValueError
(
"precision and recall must be numpy array"
)
if
precision
.
dtype
!=
np
.
float
or
recall
.
dtype
!=
np
.
float
:
raise
ValueError
(
"input must be float numpy array."
)
...
...
@@ -139,6 +138,53 @@ def compute_cor_loc(num_gt_imgs_per_class,
class
"""
return
np
.
where
(
num_gt_imgs_per_class
==
0
,
np
.
nan
,
num_gt_imgs_per_class
==
0
,
np
.
nan
,
num_images_correctly_detected_per_class
/
num_gt_imgs_per_class
)
def
compute_median_rank_at_k
(
tp_fp_list
,
k
):
"""Computes MedianRank@k, where k is the top-scoring labels.
Args:
tp_fp_list: a list of numpy arrays; each numpy array corresponds to the all
detection on a single image, where the detections are sorted by score in
descending order. Further, each numpy array element can have boolean or
float values. True positive elements have either value >0.0 or True;
any other value is considered false positive.
k: number of top-scoring proposals to take.
Returns:
median_rank: median rank of all true positive proposals among top k by
score.
"""
ranks
=
[]
for
i
in
range
(
len
(
tp_fp_list
)):
ranks
.
append
(
np
.
where
(
tp_fp_list
[
i
][
0
:
min
(
k
,
tp_fp_list
[
i
].
shape
[
0
])]
>
0
)[
0
])
concatenated_ranks
=
np
.
concatenate
(
ranks
)
return
np
.
median
(
concatenated_ranks
)
def
compute_recall_at_k
(
tp_fp_list
,
num_gt
,
k
):
"""Computes Recall@k, MedianRank@k, where k is the top-scoring labels.
Args:
tp_fp_list: a list of numpy arrays; each numpy array corresponds to the all
detection on a single image, where the detections are sorted by score in
descending order. Further, each numpy array element can have boolean or
float values. True positive elements have either value >0.0 or True;
any other value is considered false positive.
num_gt: number of groundtruth anotations.
k: number of top-scoring proposals to take.
Returns:
recall: recall evaluated on the top k by score detections.
"""
tp_fp_eval
=
[]
for
i
in
range
(
len
(
tp_fp_list
)):
tp_fp_eval
.
append
(
tp_fp_list
[
i
][
0
:
min
(
k
,
tp_fp_list
[
i
].
shape
[
0
])])
tp_fp_eval
=
np
.
concatenate
(
tp_fp_eval
)
return
np
.
sum
(
tp_fp_eval
)
/
num_gt
research/object_detection/utils/metrics_test.py
View file @
ed4e22b8
...
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.metrics."""
import
numpy
as
np
...
...
@@ -25,8 +24,8 @@ class MetricsTest(tf.test.TestCase):
def
test_compute_cor_loc
(
self
):
num_gt_imgs_per_class
=
np
.
array
([
100
,
1
,
5
,
1
,
1
],
dtype
=
int
)
num_images_correctly_detected_per_class
=
np
.
array
(
[
10
,
0
,
1
,
0
,
0
],
dtype
=
int
)
num_images_correctly_detected_per_class
=
np
.
array
(
[
10
,
0
,
1
,
0
,
0
],
dtype
=
int
)
corloc
=
metrics
.
compute_cor_loc
(
num_gt_imgs_per_class
,
num_images_correctly_detected_per_class
)
expected_corloc
=
np
.
array
([
0.1
,
0
,
0.2
,
0
,
0
],
dtype
=
float
)
...
...
@@ -34,8 +33,8 @@ class MetricsTest(tf.test.TestCase):
def
test_compute_cor_loc_nans
(
self
):
num_gt_imgs_per_class
=
np
.
array
([
100
,
0
,
0
,
1
,
1
],
dtype
=
int
)
num_images_correctly_detected_per_class
=
np
.
array
(
[
10
,
0
,
1
,
0
,
0
],
dtype
=
int
)
num_images_correctly_detected_per_class
=
np
.
array
(
[
10
,
0
,
1
,
0
,
0
],
dtype
=
int
)
corloc
=
metrics
.
compute_cor_loc
(
num_gt_imgs_per_class
,
num_images_correctly_detected_per_class
)
expected_corloc
=
np
.
array
([
0.1
,
np
.
nan
,
np
.
nan
,
0
,
0
],
dtype
=
float
)
...
...
@@ -45,18 +44,37 @@ class MetricsTest(tf.test.TestCase):
num_gt
=
10
scores
=
np
.
array
([
0.4
,
0.3
,
0.6
,
0.2
,
0.7
,
0.1
],
dtype
=
float
)
labels
=
np
.
array
([
0
,
1
,
1
,
0
,
0
,
1
],
dtype
=
bool
)
labels_float_type
=
np
.
array
([
0
,
1
,
1
,
0
,
0
,
1
],
dtype
=
float
)
accumulated_tp_count
=
np
.
array
([
0
,
1
,
1
,
2
,
2
,
3
],
dtype
=
float
)
expected_precision
=
accumulated_tp_count
/
np
.
array
([
1
,
2
,
3
,
4
,
5
,
6
])
expected_recall
=
accumulated_tp_count
/
num_gt
precision
,
recall
=
metrics
.
compute_precision_recall
(
scores
,
labels
,
num_gt
)
precision_float_type
,
recall_float_type
=
metrics
.
compute_precision_recall
(
scores
,
labels_float_type
,
num_gt
)
self
.
assertAllClose
(
precision
,
expected_precision
)
self
.
assertAllClose
(
recall
,
expected_recall
)
self
.
assertAllClose
(
precision_float_type
,
expected_precision
)
self
.
assertAllClose
(
recall_float_type
,
expected_recall
)
def
test_compute_precision_recall_float
(
self
):
num_gt
=
10
scores
=
np
.
array
([
0.4
,
0.3
,
0.6
,
0.2
,
0.7
,
0.1
],
dtype
=
float
)
labels_float
=
np
.
array
([
0
,
1
,
1
,
0.5
,
0
,
1
],
dtype
=
float
)
expected_precision
=
np
.
array
(
[
0.
,
0.5
,
0.33333333
,
0.5
,
0.55555556
,
0.63636364
],
dtype
=
float
)
expected_recall
=
np
.
array
([
0.
,
0.1
,
0.1
,
0.2
,
0.25
,
0.35
],
dtype
=
float
)
precision
,
recall
=
metrics
.
compute_precision_recall
(
scores
,
labels_float
,
num_gt
)
self
.
assertAllClose
(
precision
,
expected_precision
)
self
.
assertAllClose
(
recall
,
expected_recall
)
def
test_compute_average_precision
(
self
):
precision
=
np
.
array
([
0.8
,
0.76
,
0.9
,
0.65
,
0.7
,
0.5
,
0.55
,
0
],
dtype
=
float
)
recall
=
np
.
array
([
0.3
,
0.3
,
0.4
,
0.4
,
0.45
,
0.45
,
0.5
,
0.5
],
dtype
=
float
)
processed_precision
=
np
.
array
(
[
0.9
,
0.9
,
0.9
,
0.7
,
0.7
,
0.55
,
0.55
,
0
],
dtype
=
float
)
processed_precision
=
np
.
array
(
[
0.9
,
0.9
,
0.9
,
0.7
,
0.7
,
0.55
,
0.55
,
0
],
dtype
=
float
)
recall_interval
=
np
.
array
([
0.3
,
0
,
0.1
,
0
,
0.05
,
0
,
0.05
,
0
],
dtype
=
float
)
expected_mean_ap
=
np
.
sum
(
recall_interval
*
processed_precision
)
mean_ap
=
metrics
.
compute_average_precision
(
precision
,
recall
)
...
...
@@ -74,6 +92,52 @@ class MetricsTest(tf.test.TestCase):
ap
=
metrics
.
compute_average_precision
(
precision
,
recall
)
self
.
assertTrue
(
np
.
isnan
(
ap
))
def
test_compute_recall_at_k
(
self
):
num_gt
=
4
tp_fp
=
[
np
.
array
([
1
,
0
,
0
],
dtype
=
float
),
np
.
array
([
0
,
1
],
dtype
=
float
),
np
.
array
([
0
,
0
,
0
,
0
,
0
],
dtype
=
float
)
]
tp_fp_bool
=
[
np
.
array
([
True
,
False
,
False
],
dtype
=
bool
),
np
.
array
([
False
,
True
],
dtype
=
float
),
np
.
array
([
False
,
False
,
False
,
False
,
False
],
dtype
=
float
)
]
recall_1
=
metrics
.
compute_recall_at_k
(
tp_fp
,
num_gt
,
1
)
recall_3
=
metrics
.
compute_recall_at_k
(
tp_fp
,
num_gt
,
3
)
recall_5
=
metrics
.
compute_recall_at_k
(
tp_fp
,
num_gt
,
5
)
recall_3_bool
=
metrics
.
compute_recall_at_k
(
tp_fp_bool
,
num_gt
,
3
)
self
.
assertAlmostEqual
(
recall_1
,
0.25
)
self
.
assertAlmostEqual
(
recall_3
,
0.5
)
self
.
assertAlmostEqual
(
recall_3_bool
,
0.5
)
self
.
assertAlmostEqual
(
recall_5
,
0.5
)
def
test_compute_median_rank_at_k
(
self
):
tp_fp
=
[
np
.
array
([
1
,
0
,
0
],
dtype
=
float
),
np
.
array
([
0
,
0.1
],
dtype
=
float
),
np
.
array
([
0
,
0
,
0
,
0
,
0
],
dtype
=
float
)
]
tp_fp_bool
=
[
np
.
array
([
True
,
False
,
False
],
dtype
=
bool
),
np
.
array
([
False
,
True
],
dtype
=
float
),
np
.
array
([
False
,
False
,
False
,
False
,
False
],
dtype
=
float
)
]
median_ranks_1
=
metrics
.
compute_median_rank_at_k
(
tp_fp
,
1
)
median_ranks_3
=
metrics
.
compute_median_rank_at_k
(
tp_fp
,
3
)
median_ranks_5
=
metrics
.
compute_median_rank_at_k
(
tp_fp
,
5
)
median_ranks_3_bool
=
metrics
.
compute_median_rank_at_k
(
tp_fp_bool
,
3
)
self
.
assertEquals
(
median_ranks_1
,
0
)
self
.
assertEquals
(
median_ranks_3
,
0.5
)
self
.
assertEquals
(
median_ranks_3_bool
,
0.5
)
self
.
assertEquals
(
median_ranks_5
,
0.5
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/utils/object_detection_evaluation.py
View file @
ed4e22b8
...
...
@@ -110,7 +110,8 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
evaluate_corlocs
=
False
,
metric_prefix
=
None
,
use_weighted_mean_ap
=
False
,
evaluate_masks
=
False
):
evaluate_masks
=
False
,
group_of_weight
=
0.0
):
"""Constructor.
Args:
...
...
@@ -128,6 +129,12 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
of all classes.
evaluate_masks: If False, evaluation will be performed based on boxes.
If True, mask evaluation will be performed instead.
group_of_weight: Weight of group-of boxes.If set to 0, detections of the
correct class within a group-of box are ignored. If weight is > 0, then
if at least one detection falls within a group-of box with
matching_iou_threshold, weight group_of_weight is added to true
positives. Consequently, if no detection falls within a group-of box,
weight group_of_weight is added to false negatives.
Raises:
ValueError: If the category ids are not 1-indexed.
...
...
@@ -140,11 +147,13 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
self
.
_use_weighted_mean_ap
=
use_weighted_mean_ap
self
.
_label_id_offset
=
1
self
.
_evaluate_masks
=
evaluate_masks
self
.
_group_of_weight
=
group_of_weight
self
.
_evaluation
=
ObjectDetectionEvaluation
(
num_groundtruth_classes
=
self
.
_num_classes
,
matching_iou_threshold
=
self
.
_matching_iou_threshold
,
use_weighted_mean_ap
=
self
.
_use_weighted_mean_ap
,
label_id_offset
=
self
.
_label_id_offset
)
label_id_offset
=
self
.
_label_id_offset
,
group_of_weight
=
self
.
_group_of_weight
)
self
.
_image_ids
=
set
([])
self
.
_evaluate_corlocs
=
evaluate_corlocs
self
.
_metric_prefix
=
(
metric_prefix
+
'_'
)
if
metric_prefix
else
''
...
...
@@ -383,7 +392,9 @@ class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator):
def
__init__
(
self
,
categories
,
matching_iou_threshold
=
0.5
,
evaluate_corlocs
=
False
):
evaluate_corlocs
=
False
,
metric_prefix
=
'OpenImagesV2'
,
group_of_weight
=
0.0
):
"""Constructor.
Args:
...
...
@@ -393,12 +404,21 @@ class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator):
matching_iou_threshold: IOU threshold to use for matching groundtruth
boxes to detection boxes.
evaluate_corlocs: if True, additionally evaluates and returns CorLoc.
metric_prefix: Prefix name of the metric.
group_of_weight: Weight of the group-of bounding box. If set to 0 (default
for Open Images V2 detection protocol), detections of the correct class
within a group-of box are ignored. If weight is > 0, then if at least
one detection falls within a group-of box with matching_iou_threshold,
weight group_of_weight is added to true positives. Consequently, if no
detection falls within a group-of box, weight group_of_weight is added
to false negatives.
"""
super
(
OpenImagesDetectionEvaluator
,
self
).
__init__
(
categories
,
matching_iou_threshold
,
evaluate_corlocs
,
metric_prefix
=
'OpenImagesV2'
)
metric_prefix
=
metric_prefix
,
group_of_weight
=
group_of_weight
)
def
add_single_ground_truth_image_info
(
self
,
image_id
,
groundtruth_dict
):
"""Adds groundtruth for a single image to be used for evaluation.
...
...
@@ -449,6 +469,130 @@ class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator):
self
.
_image_ids
.
update
([
image_id
])
class
OpenImagesDetectionChallengeEvaluator
(
OpenImagesDetectionEvaluator
):
"""A class implements Open Images Challenge Detection metrics.
Open Images Challenge Detection metric has two major changes in comparison
with Open Images V2 detection metric:
- a custom weight might be specified for detecting an object contained in
a group-of box.
- verified image-level labels should be explicitelly provided for
evaluation: in case in image has neither positive nor negative image level
label of class c, all detections of this class on this image will be
ignored.
"""
def
__init__
(
self
,
categories
,
matching_iou_threshold
=
0.5
,
evaluate_corlocs
=
False
,
group_of_weight
=
1.0
):
"""Constructor.
Args:
categories: A list of dicts, each of which has the following keys -
'id': (required) an integer id uniquely identifying this category.
'name': (required) string representing category name e.g., 'cat', 'dog'.
matching_iou_threshold: IOU threshold to use for matching groundtruth
boxes to detection boxes.
evaluate_corlocs: if True, additionally evaluates and returns CorLoc.
group_of_weight: weight of a group-of box. If set to 0, detections of the
correct class within a group-of box are ignored. If weight is > 0
(default for Open Images Detection Challenge 2018), then if at least one
detection falls within a group-of box with matching_iou_threshold,
weight group_of_weight is added to true positives. Consequently, if no
detection falls within a group-of box, weight group_of_weight is added
to false negatives.
"""
super
(
OpenImagesDetectionChallengeEvaluator
,
self
).
__init__
(
categories
,
matching_iou_threshold
,
evaluate_corlocs
,
metric_prefix
=
'OpenImagesChallenge2018'
,
group_of_weight
=
group_of_weight
)
self
.
_evaluatable_labels
=
{}
def
add_single_ground_truth_image_info
(
self
,
image_id
,
groundtruth_dict
):
"""Adds groundtruth for a single image to be used for evaluation.
Args:
image_id: A unique string/integer identifier for the image.
groundtruth_dict: A dictionary containing -
standard_fields.InputDataFields.groundtruth_boxes: float32 numpy array
of shape [num_boxes, 4] containing `num_boxes` groundtruth boxes of
the format [ymin, xmin, ymax, xmax] in absolute image coordinates.
standard_fields.InputDataFields.groundtruth_classes: integer numpy array
of shape [num_boxes] containing 1-indexed groundtruth classes for the
boxes.
standard_fields.InputDataFields.verified_labels: integer 1D numpy array
containing all classes for which labels are verified.
standard_fields.InputDataFields.groundtruth_group_of: Optional length
M numpy boolean array denoting whether a groundtruth box contains a
group of instances.
Raises:
ValueError: On adding groundtruth for an image more than once.
"""
super
(
OpenImagesDetectionChallengeEvaluator
,
self
).
add_single_ground_truth_image_info
(
image_id
,
groundtruth_dict
)
groundtruth_classes
=
(
groundtruth_dict
[
standard_fields
.
InputDataFields
.
groundtruth_classes
]
-
self
.
_label_id_offset
)
self
.
_evaluatable_labels
[
image_id
]
=
np
.
unique
(
np
.
concatenate
(((
groundtruth_dict
.
get
(
standard_fields
.
InputDataFields
.
verified_labels
,
np
.
array
([],
dtype
=
int
))
-
self
.
_label_id_offset
),
groundtruth_classes
)))
def
add_single_detected_image_info
(
self
,
image_id
,
detections_dict
):
"""Adds detections for a single image to be used for evaluation.
Args:
image_id: A unique string/integer identifier for the image.
detections_dict: A dictionary containing -
standard_fields.DetectionResultFields.detection_boxes: float32 numpy
array of shape [num_boxes, 4] containing `num_boxes` detection boxes
of the format [ymin, xmin, ymax, xmax] in absolute image coordinates.
standard_fields.DetectionResultFields.detection_scores: float32 numpy
array of shape [num_boxes] containing detection scores for the boxes.
standard_fields.DetectionResultFields.detection_classes: integer numpy
array of shape [num_boxes] containing 1-indexed detection classes for
the boxes.
Raises:
ValueError: If detection masks are not in detections dictionary.
"""
if
image_id
not
in
self
.
_image_ids
:
# Since for the correct work of evaluator it is assumed that groundtruth
# is inserted first we make sure to break the code if is it not the case.
self
.
_image_ids
.
update
([
image_id
])
self
.
_evaluatable_labels
[
image_id
]
=
np
.
array
([])
detection_classes
=
(
detections_dict
[
standard_fields
.
DetectionResultFields
.
detection_classes
]
-
self
.
_label_id_offset
)
allowed_classes
=
np
.
where
(
np
.
isin
(
detection_classes
,
self
.
_evaluatable_labels
[
image_id
]))
detection_classes
=
detection_classes
[
allowed_classes
]
detected_boxes
=
detections_dict
[
standard_fields
.
DetectionResultFields
.
detection_boxes
][
allowed_classes
]
detected_scores
=
detections_dict
[
standard_fields
.
DetectionResultFields
.
detection_scores
][
allowed_classes
]
self
.
_evaluation
.
add_single_detected_image_info
(
image_key
=
image_id
,
detected_boxes
=
detected_boxes
,
detected_scores
=
detected_scores
,
detected_class_labels
=
detection_classes
)
def
clear
(
self
):
"""Clears stored data."""
super
(
OpenImagesDetectionChallengeEvaluator
,
self
).
clear
()
self
.
_evaluatable_labels
.
clear
()
ObjectDetectionEvalMetrics
=
collections
.
namedtuple
(
'ObjectDetectionEvalMetrics'
,
[
'average_precisions'
,
'mean_ap'
,
'precisions'
,
'recalls'
,
'corlocs'
,
...
...
@@ -465,7 +609,8 @@ class ObjectDetectionEvaluation(object):
nms_iou_threshold
=
1.0
,
nms_max_output_boxes
=
10000
,
use_weighted_mean_ap
=
False
,
label_id_offset
=
0
):
label_id_offset
=
0
,
group_of_weight
=
0.0
):
if
num_groundtruth_classes
<
1
:
raise
ValueError
(
'Need at least 1 groundtruth class for evaluation.'
)
...
...
@@ -473,7 +618,9 @@ class ObjectDetectionEvaluation(object):
num_groundtruth_classes
=
num_groundtruth_classes
,
matching_iou_threshold
=
matching_iou_threshold
,
nms_iou_threshold
=
nms_iou_threshold
,
nms_max_output_boxes
=
nms_max_output_boxes
)
nms_max_output_boxes
=
nms_max_output_boxes
,
group_of_weight
=
group_of_weight
)
self
.
group_of_weight
=
group_of_weight
self
.
num_class
=
num_groundtruth_classes
self
.
use_weighted_mean_ap
=
use_weighted_mean_ap
self
.
label_id_offset
=
label_id_offset
...
...
@@ -483,7 +630,7 @@ class ObjectDetectionEvaluation(object):
self
.
groundtruth_masks
=
{}
self
.
groundtruth_is_difficult_list
=
{}
self
.
groundtruth_is_group_of_list
=
{}
self
.
num_gt_instances_per_class
=
np
.
zeros
(
self
.
num_class
,
dtype
=
in
t
)
self
.
num_gt_instances_per_class
=
np
.
zeros
(
self
.
num_class
,
dtype
=
floa
t
)
self
.
num_gt_imgs_per_class
=
np
.
zeros
(
self
.
num_class
,
dtype
=
int
)
self
.
_initialize_detections
()
...
...
@@ -650,7 +797,10 @@ class ObjectDetectionEvaluation(object):
num_gt_instances
=
np
.
sum
(
groundtruth_class_labels
[
~
groundtruth_is_difficult_list
&
~
groundtruth_is_group_of_list
]
==
class_index
)
self
.
num_gt_instances_per_class
[
class_index
]
+=
num_gt_instances
num_groupof_gt_instances
=
self
.
group_of_weight
*
np
.
sum
(
groundtruth_class_labels
[
groundtruth_is_group_of_list
]
==
class_index
)
self
.
num_gt_instances_per_class
[
class_index
]
+=
num_gt_instances
+
num_groupof_gt_instances
if
np
.
any
(
groundtruth_class_labels
==
class_index
):
self
.
num_gt_imgs_per_class
[
class_index
]
+=
1
...
...
@@ -677,13 +827,12 @@ class ObjectDetectionEvaluation(object):
if
self
.
use_weighted_mean_ap
:
all_scores
=
np
.
array
([],
dtype
=
float
)
all_tp_fp_labels
=
np
.
array
([],
dtype
=
bool
)
for
class_index
in
range
(
self
.
num_class
):
if
self
.
num_gt_instances_per_class
[
class_index
]
==
0
:
continue
if
not
self
.
scores_per_class
[
class_index
]:
scores
=
np
.
array
([],
dtype
=
float
)
tp_fp_labels
=
np
.
array
([],
dtype
=
bool
)
tp_fp_labels
=
np
.
array
([],
dtype
=
float
)
else
:
scores
=
np
.
concatenate
(
self
.
scores_per_class
[
class_index
])
tp_fp_labels
=
np
.
concatenate
(
self
.
tp_fp_labels_per_class
[
class_index
])
...
...
research/object_detection/utils/object_detection_evaluation_test.py
View file @
ed4e22b8
...
...
@@ -100,6 +100,126 @@ class OpenImagesV2EvaluationTest(tf.test.TestCase):
self
.
assertFalse
(
oiv2_evaluator
.
_image_ids
)
class
OpenImagesDetectionChallengeEvaluatorTest
(
tf
.
test
.
TestCase
):
def
test_returns_correct_metric_values
(
self
):
categories
=
[{
'id'
:
1
,
'name'
:
'cat'
},
{
'id'
:
2
,
'name'
:
'dog'
},
{
'id'
:
3
,
'name'
:
'elephant'
}]
oivchallenge_evaluator
=
(
object_detection_evaluation
.
OpenImagesDetectionChallengeEvaluator
(
categories
,
group_of_weight
=
0.5
))
image_key
=
'img1'
groundtruth_boxes
=
np
.
array
(
[[
0
,
0
,
1
,
1
],
[
0
,
0
,
2
,
2
],
[
0
,
0
,
3
,
3
]],
dtype
=
float
)
groundtruth_class_labels
=
np
.
array
([
1
,
3
,
1
],
dtype
=
int
)
groundtruth_is_group_of_list
=
np
.
array
([
False
,
False
,
True
],
dtype
=
bool
)
groundtruth_verified_labels
=
np
.
array
([
1
,
2
,
3
],
dtype
=
int
)
oivchallenge_evaluator
.
add_single_ground_truth_image_info
(
image_key
,
{
standard_fields
.
InputDataFields
.
groundtruth_boxes
:
groundtruth_boxes
,
standard_fields
.
InputDataFields
.
groundtruth_classes
:
groundtruth_class_labels
,
standard_fields
.
InputDataFields
.
groundtruth_group_of
:
groundtruth_is_group_of_list
,
standard_fields
.
InputDataFields
.
verified_labels
:
groundtruth_verified_labels
,
})
image_key
=
'img2'
groundtruth_boxes
=
np
.
array
(
[[
10
,
10
,
11
,
11
],
[
500
,
500
,
510
,
510
],
[
10
,
10
,
12
,
12
]],
dtype
=
float
)
groundtruth_class_labels
=
np
.
array
([
1
,
1
,
3
],
dtype
=
int
)
groundtruth_is_group_of_list
=
np
.
array
([
False
,
False
,
True
],
dtype
=
bool
)
oivchallenge_evaluator
.
add_single_ground_truth_image_info
(
image_key
,
{
standard_fields
.
InputDataFields
.
groundtruth_boxes
:
groundtruth_boxes
,
standard_fields
.
InputDataFields
.
groundtruth_classes
:
groundtruth_class_labels
,
standard_fields
.
InputDataFields
.
groundtruth_group_of
:
groundtruth_is_group_of_list
})
image_key
=
'img3'
groundtruth_boxes
=
np
.
array
([[
0
,
0
,
1
,
1
]],
dtype
=
float
)
groundtruth_class_labels
=
np
.
array
([
2
],
dtype
=
int
)
oivchallenge_evaluator
.
add_single_ground_truth_image_info
(
image_key
,
{
standard_fields
.
InputDataFields
.
groundtruth_boxes
:
groundtruth_boxes
,
standard_fields
.
InputDataFields
.
groundtruth_classes
:
groundtruth_class_labels
})
image_key
=
'img1'
detected_boxes
=
np
.
array
(
[[
10
,
10
,
11
,
11
],
[
100
,
100
,
120
,
120
]],
dtype
=
float
)
detected_class_labels
=
np
.
array
([
2
,
2
],
dtype
=
int
)
detected_scores
=
np
.
array
([
0.7
,
0.8
],
dtype
=
float
)
oivchallenge_evaluator
.
add_single_detected_image_info
(
image_key
,
{
standard_fields
.
DetectionResultFields
.
detection_boxes
:
detected_boxes
,
standard_fields
.
DetectionResultFields
.
detection_scores
:
detected_scores
,
standard_fields
.
DetectionResultFields
.
detection_classes
:
detected_class_labels
})
image_key
=
'img2'
detected_boxes
=
np
.
array
(
[[
10
,
10
,
11
,
11
],
[
100
,
100
,
120
,
120
],
[
100
,
100
,
220
,
220
],
[
10
,
10
,
11
,
11
]],
dtype
=
float
)
detected_class_labels
=
np
.
array
([
1
,
1
,
2
,
3
],
dtype
=
int
)
detected_scores
=
np
.
array
([
0.7
,
0.8
,
0.5
,
0.9
],
dtype
=
float
)
oivchallenge_evaluator
.
add_single_detected_image_info
(
image_key
,
{
standard_fields
.
DetectionResultFields
.
detection_boxes
:
detected_boxes
,
standard_fields
.
DetectionResultFields
.
detection_scores
:
detected_scores
,
standard_fields
.
DetectionResultFields
.
detection_classes
:
detected_class_labels
})
image_key
=
'img3'
detected_boxes
=
np
.
array
([[
0
,
0
,
1
,
1
]],
dtype
=
float
)
detected_class_labels
=
np
.
array
([
2
],
dtype
=
int
)
detected_scores
=
np
.
array
([
0.5
],
dtype
=
float
)
oivchallenge_evaluator
.
add_single_detected_image_info
(
image_key
,
{
standard_fields
.
DetectionResultFields
.
detection_boxes
:
detected_boxes
,
standard_fields
.
DetectionResultFields
.
detection_scores
:
detected_scores
,
standard_fields
.
DetectionResultFields
.
detection_classes
:
detected_class_labels
})
metrics
=
oivchallenge_evaluator
.
evaluate
()
self
.
assertAlmostEqual
(
metrics
[
'OpenImagesChallenge2018_PerformanceByCategory/AP@0.5IOU/dog'
],
0.3333333333
)
self
.
assertAlmostEqual
(
metrics
[
'OpenImagesChallenge2018_PerformanceByCategory/AP@0.5IOU/elephant'
],
0.333333333333
)
self
.
assertAlmostEqual
(
metrics
[
'OpenImagesChallenge2018_PerformanceByCategory/AP@0.5IOU/cat'
],
0.142857142857
)
self
.
assertAlmostEqual
(
metrics
[
'OpenImagesChallenge2018_Precision/mAP@0.5IOU'
],
0.269841269
)
oivchallenge_evaluator
.
clear
()
self
.
assertFalse
(
oivchallenge_evaluator
.
_image_ids
)
class
PascalEvaluationTest
(
tf
.
test
.
TestCase
):
def
test_returns_correct_metric_values_on_boxes
(
self
):
...
...
research/object_detection/utils/per_image_evaluation.py
View file @
ed4e22b8
...
...
@@ -35,7 +35,8 @@ class PerImageEvaluation(object):
num_groundtruth_classes
,
matching_iou_threshold
=
0.5
,
nms_iou_threshold
=
0.3
,
nms_max_output_boxes
=
50
):
nms_max_output_boxes
=
50
,
group_of_weight
=
0.0
):
"""Initialized PerImageEvaluation by evaluation parameters.
Args:
...
...
@@ -44,24 +45,26 @@ class PerImageEvaluation(object):
the threshold to consider whether a detection is true positive or not
nms_iou_threshold: IOU threshold used in Non Maximum Suppression.
nms_max_output_boxes: Number of maximum output boxes in NMS.
group_of_weight: Weight of the group-of boxes.
"""
self
.
matching_iou_threshold
=
matching_iou_threshold
self
.
nms_iou_threshold
=
nms_iou_threshold
self
.
nms_max_output_boxes
=
nms_max_output_boxes
self
.
num_groundtruth_classes
=
num_groundtruth_classes
self
.
group_of_weight
=
group_of_weight
def
compute_object_detection_metrics
(
self
,
detected_boxes
,
detected_scores
,
detected_class_labels
,
groundtruth_boxes
,
groundtruth_class_labels
,
groundtruth_is_difficult_list
,
groundtruth_is_group_of_list
,
detected_masks
=
None
,
groundtruth_masks
=
None
):
"""Evaluates detections as being tp, fp or
ignor
ed from a single image.
"""Evaluates detections as being tp, fp or
weight
ed from a single image.
The evaluation is done in two stages:
1. All detections are matched to non group-of boxes; true positives are
determined and detections matched to difficult boxes are ignored.
2. Detections that are determined as false positives are matched against
group-of boxes and
ignor
ed if matched.
group-of boxes and
weight
ed if matched.
Args:
detected_boxes: A float numpy array of shape [N, 4], representing N
...
...
@@ -339,7 +342,8 @@ class PerImageEvaluation(object):
box_data
=
groundtruth_boxes
[
groundtruth_is_group_of_list
],
mask_data
=
groundtruth_masks
[
groundtruth_is_group_of_list
])
iou
=
np_box_mask_list_ops
.
iou
(
detected_boxlist
,
gt_non_group_of_boxlist
)
ioa
=
np_box_mask_list_ops
.
ioa
(
gt_group_of_boxlist
,
detected_boxlist
)
ioa
=
np
.
transpose
(
np_box_mask_list_ops
.
ioa
(
gt_group_of_boxlist
,
detected_boxlist
))
scores
=
detected_boxlist
.
get_field
(
'scores'
)
num_boxes
=
detected_boxlist
.
num_boxes
()
return
iou
,
ioa
,
scores
,
num_boxes
...
...
@@ -380,7 +384,8 @@ class PerImageEvaluation(object):
gt_group_of_boxlist
=
np_box_list
.
BoxList
(
groundtruth_boxes
[
groundtruth_is_group_of_list
])
iou
=
np_box_list_ops
.
iou
(
detected_boxlist
,
gt_non_group_of_boxlist
)
ioa
=
np_box_list_ops
.
ioa
(
gt_group_of_boxlist
,
detected_boxlist
)
ioa
=
np
.
transpose
(
np_box_list_ops
.
ioa
(
gt_group_of_boxlist
,
detected_boxlist
))
scores
=
detected_boxlist
.
get_field
(
'scores'
)
num_boxes
=
detected_boxlist
.
num_boxes
()
return
iou
,
ioa
,
scores
,
num_boxes
...
...
@@ -455,7 +460,8 @@ class PerImageEvaluation(object):
# 1. All detections are matched to non group-of boxes; true positives are
# determined and detections matched to difficult boxes are ignored.
# 2. Detections that are determined as false positives are matched against
# group-of boxes and ignored if matched.
# group-of boxes and scored with weight w per ground truth box is
# matched.
# Tp-fp evaluation for non-group of boxes (if any).
if
iou
.
shape
[
1
]
>
0
:
...
...
@@ -473,18 +479,29 @@ class PerImageEvaluation(object):
else
:
is_matched_to_difficult_box
[
i
]
=
True
scores_group_of
=
np
.
zeros
(
ioa
.
shape
[
1
],
dtype
=
float
)
tp_fp_labels_group_of
=
self
.
group_of_weight
*
np
.
ones
(
ioa
.
shape
[
1
],
dtype
=
float
)
# Tp-fp evaluation for group of boxes.
if
ioa
.
shape
[
0
]
>
0
:
max_overlap_group_of_gt
=
np
.
max
(
ioa
,
axis
=
0
)
if
ioa
.
shape
[
1
]
>
0
:
max_overlap_group_of_gt
_ids
=
np
.
arg
max
(
ioa
,
axis
=
1
)
for
i
in
range
(
num_detected_boxes
):
gt_id
=
max_overlap_group_of_gt_ids
[
i
]
if
(
not
tp_fp_labels
[
i
]
and
not
is_matched_to_difficult_box
[
i
]
and
max_overlap_group_of_gt
[
i
]
>=
self
.
matching_iou_threshold
):
ioa
[
i
,
gt_id
]
>=
self
.
matching_iou_threshold
):
is_matched_to_group_of_box
[
i
]
=
True
return
scores
[
~
is_matched_to_difficult_box
&
~
is_matched_to_group_of_box
],
tp_fp_labels
[
~
is_matched_to_difficult_box
&
~
is_matched_to_group_of_box
]
scores_group_of
[
gt_id
]
=
max
(
scores_group_of
[
gt_id
],
scores
[
i
])
selector
=
np
.
where
((
scores_group_of
>
0
)
&
(
tp_fp_labels_group_of
>
0
))
scores_group_of
=
scores_group_of
[
selector
]
tp_fp_labels_group_of
=
tp_fp_labels_group_of
[
selector
]
return
np
.
concatenate
(
(
scores
[
~
is_matched_to_difficult_box
&
~
is_matched_to_group_of_box
],
scores_group_of
)),
np
.
concatenate
(
(
tp_fp_labels
[
~
is_matched_to_difficult_box
&
~
is_matched_to_group_of_box
].
astype
(
float
),
tp_fp_labels_group_of
))
def
_get_ith_class_arrays
(
self
,
detected_boxes
,
detected_scores
,
detected_masks
,
detected_class_labels
,
...
...
research/object_detection/utils/per_image_evaluation_test.py
View file @
ed4e22b8
...
...
@@ -173,6 +173,7 @@ class SingleClassTpFpWithGroupOfBoxesTest(tf.test.TestCase):
self
.
detected_boxes
,
self
.
detected_scores
,
self
.
groundtruth_boxes
,
groundtruth_groundtruth_is_difficult_list
,
groundtruth_groundtruth_is_group_of_list
)
self
.
assertTrue
(
np
.
allclose
(
expected_scores
,
scores
))
self
.
assertTrue
(
np
.
allclose
(
expected_tp_fp_labels
,
tp_fp_labels
))
...
...
@@ -191,6 +192,7 @@ class SingleClassTpFpWithGroupOfBoxesTest(tf.test.TestCase):
groundtruth_groundtruth_is_group_of_list
,
detected_masks
=
self
.
detected_masks
,
groundtruth_masks
=
self
.
groundtruth_masks
)
self
.
assertTrue
(
np
.
allclose
(
expected_scores
,
scores
))
self
.
assertTrue
(
np
.
allclose
(
expected_tp_fp_labels
,
tp_fp_labels
))
...
...
@@ -227,6 +229,122 @@ class SingleClassTpFpWithGroupOfBoxesTest(tf.test.TestCase):
self
.
assertTrue
(
np
.
allclose
(
expected_tp_fp_labels
,
tp_fp_labels
))
class
SingleClassTpFpWithGroupOfBoxesTestWeighted
(
tf
.
test
.
TestCase
):
def
setUp
(
self
):
num_groundtruth_classes
=
1
matching_iou_threshold
=
0.5
nms_iou_threshold
=
1.0
nms_max_output_boxes
=
10000
self
.
group_of_weight
=
0.5
self
.
eval
=
per_image_evaluation
.
PerImageEvaluation
(
num_groundtruth_classes
,
matching_iou_threshold
,
nms_iou_threshold
,
nms_max_output_boxes
,
self
.
group_of_weight
)
self
.
detected_boxes
=
np
.
array
(
[[
0
,
0
,
1
,
1
],
[
0
,
0
,
2
,
1
],
[
0
,
0
,
3
,
1
]],
dtype
=
float
)
self
.
detected_scores
=
np
.
array
([
0.8
,
0.6
,
0.5
],
dtype
=
float
)
detected_masks_0
=
np
.
array
(
[[
0
,
1
,
1
,
0
],
[
0
,
0
,
1
,
0
],
[
0
,
0
,
0
,
0
]],
dtype
=
np
.
uint8
)
detected_masks_1
=
np
.
array
(
[[
1
,
0
,
0
,
0
],
[
1
,
1
,
0
,
0
],
[
0
,
0
,
0
,
0
]],
dtype
=
np
.
uint8
)
detected_masks_2
=
np
.
array
(
[[
0
,
0
,
0
,
0
],
[
0
,
1
,
1
,
0
],
[
0
,
1
,
0
,
0
]],
dtype
=
np
.
uint8
)
self
.
detected_masks
=
np
.
stack
(
[
detected_masks_0
,
detected_masks_1
,
detected_masks_2
],
axis
=
0
)
self
.
groundtruth_boxes
=
np
.
array
(
[[
0
,
0
,
1
,
1
],
[
0
,
0
,
5
,
5
],
[
10
,
10
,
20
,
20
]],
dtype
=
float
)
groundtruth_masks_0
=
np
.
array
(
[[
1
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
]],
dtype
=
np
.
uint8
)
groundtruth_masks_1
=
np
.
array
(
[[
0
,
0
,
1
,
0
],
[
0
,
0
,
1
,
0
],
[
0
,
0
,
1
,
0
]],
dtype
=
np
.
uint8
)
groundtruth_masks_2
=
np
.
array
(
[[
0
,
1
,
0
,
0
],
[
0
,
1
,
0
,
0
],
[
0
,
1
,
0
,
0
]],
dtype
=
np
.
uint8
)
self
.
groundtruth_masks
=
np
.
stack
(
[
groundtruth_masks_0
,
groundtruth_masks_1
,
groundtruth_masks_2
],
axis
=
0
)
def
test_match_to_non_group_of_and_group_of_box
(
self
):
groundtruth_groundtruth_is_difficult_list
=
np
.
array
(
[
False
,
False
,
False
],
dtype
=
bool
)
groundtruth_groundtruth_is_group_of_list
=
np
.
array
(
[
False
,
True
,
True
],
dtype
=
bool
)
expected_scores
=
np
.
array
([
0.8
,
0.6
],
dtype
=
float
)
expected_tp_fp_labels
=
np
.
array
([
1.0
,
self
.
group_of_weight
],
dtype
=
float
)
scores
,
tp_fp_labels
=
self
.
eval
.
_compute_tp_fp_for_single_class
(
self
.
detected_boxes
,
self
.
detected_scores
,
self
.
groundtruth_boxes
,
groundtruth_groundtruth_is_difficult_list
,
groundtruth_groundtruth_is_group_of_list
)
self
.
assertTrue
(
np
.
allclose
(
expected_scores
,
scores
))
self
.
assertTrue
(
np
.
allclose
(
expected_tp_fp_labels
,
tp_fp_labels
))
def
test_mask_match_to_non_group_of_and_group_of_box
(
self
):
groundtruth_groundtruth_is_difficult_list
=
np
.
array
(
[
False
,
False
,
False
],
dtype
=
bool
)
groundtruth_groundtruth_is_group_of_list
=
np
.
array
(
[
False
,
True
,
True
],
dtype
=
bool
)
expected_scores
=
np
.
array
([
0.6
,
0.8
,
0.5
],
dtype
=
float
)
expected_tp_fp_labels
=
np
.
array
(
[
1.0
,
self
.
group_of_weight
,
self
.
group_of_weight
],
dtype
=
float
)
scores
,
tp_fp_labels
=
self
.
eval
.
_compute_tp_fp_for_single_class
(
self
.
detected_boxes
,
self
.
detected_scores
,
self
.
groundtruth_boxes
,
groundtruth_groundtruth_is_difficult_list
,
groundtruth_groundtruth_is_group_of_list
,
detected_masks
=
self
.
detected_masks
,
groundtruth_masks
=
self
.
groundtruth_masks
)
tf
.
logging
.
info
(
"test_mask_match_to_non_group_of_and_group_of_box {} {}"
.
format
(
tp_fp_labels
,
expected_tp_fp_labels
))
self
.
assertTrue
(
np
.
allclose
(
expected_scores
,
scores
))
self
.
assertTrue
(
np
.
allclose
(
expected_tp_fp_labels
,
tp_fp_labels
))
def
test_match_two_to_group_of_box
(
self
):
groundtruth_groundtruth_is_difficult_list
=
np
.
array
(
[
False
,
False
,
False
],
dtype
=
bool
)
groundtruth_groundtruth_is_group_of_list
=
np
.
array
(
[
True
,
False
,
True
],
dtype
=
bool
)
expected_scores
=
np
.
array
([
0.5
,
0.8
],
dtype
=
float
)
expected_tp_fp_labels
=
np
.
array
([
0.0
,
self
.
group_of_weight
],
dtype
=
float
)
scores
,
tp_fp_labels
=
self
.
eval
.
_compute_tp_fp_for_single_class
(
self
.
detected_boxes
,
self
.
detected_scores
,
self
.
groundtruth_boxes
,
groundtruth_groundtruth_is_difficult_list
,
groundtruth_groundtruth_is_group_of_list
)
tf
.
logging
.
info
(
"test_match_two_to_group_of_box {} {}"
.
format
(
tp_fp_labels
,
expected_tp_fp_labels
))
self
.
assertTrue
(
np
.
allclose
(
expected_scores
,
scores
))
self
.
assertTrue
(
np
.
allclose
(
expected_tp_fp_labels
,
tp_fp_labels
))
def
test_mask_match_two_to_group_of_box
(
self
):
groundtruth_groundtruth_is_difficult_list
=
np
.
array
(
[
False
,
False
,
False
],
dtype
=
bool
)
groundtruth_groundtruth_is_group_of_list
=
np
.
array
(
[
True
,
False
,
True
],
dtype
=
bool
)
expected_scores
=
np
.
array
([
0.8
,
0.6
,
0.5
],
dtype
=
float
)
expected_tp_fp_labels
=
np
.
array
(
[
1.0
,
self
.
group_of_weight
,
self
.
group_of_weight
],
dtype
=
float
)
scores
,
tp_fp_labels
=
self
.
eval
.
_compute_tp_fp_for_single_class
(
self
.
detected_boxes
,
self
.
detected_scores
,
self
.
groundtruth_boxes
,
groundtruth_groundtruth_is_difficult_list
,
groundtruth_groundtruth_is_group_of_list
,
detected_masks
=
self
.
detected_masks
,
groundtruth_masks
=
self
.
groundtruth_masks
)
tf
.
logging
.
info
(
"test_mask_match_two_to_group_of_box {} {}"
.
format
(
tp_fp_labels
,
expected_tp_fp_labels
))
self
.
assertTrue
(
np
.
allclose
(
expected_scores
,
scores
))
self
.
assertTrue
(
np
.
allclose
(
expected_tp_fp_labels
,
tp_fp_labels
))
class
SingleClassTpFpNoDifficultBoxesTest
(
tf
.
test
.
TestCase
):
def
setUp
(
self
):
...
...
@@ -439,5 +557,5 @@ class CorLocTest(tf.test.TestCase):
is_class_correctly_detected_in_image
))
if
__name__
==
'
__main__
'
:
if
__name__
==
"
__main__
"
:
tf
.
test
.
main
()
research/object_detection/utils/per_image_vrd_evaluation.py
0 → 100644
View file @
ed4e22b8
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Evaluates Visual Relations Detection(VRD) result evaluation on an image.
Annotate each VRD result as true positives or false positive according to
a predefined IOU ratio. Multi-class detection is supported by default.
Based on the settings, per image evaluation is performed either on phrase
detection subtask or on relation detection subtask.
"""
import
numpy
as
np
from
object_detection.utils
import
np_box_list
from
object_detection.utils
import
np_box_list_ops
class
PerImageVRDEvaluation
(
object
):
"""Evaluate vrd result of a single image."""
def
__init__
(
self
,
matching_iou_threshold
=
0.5
):
"""Initialized PerImageVRDEvaluation by evaluation parameters.
Args:
matching_iou_threshold: A ratio of area intersection to union, which is
the threshold to consider whether a detection is true positive or not;
in phrase detection subtask.
"""
self
.
matching_iou_threshold
=
matching_iou_threshold
def
compute_detection_tp_fp
(
self
,
detected_box_tuples
,
detected_scores
,
detected_class_tuples
,
groundtruth_box_tuples
,
groundtruth_class_tuples
):
"""Evaluates VRD as being tp, fp from a single image.
Args:
detected_box_tuples: A numpy array of structures with shape [N,],
representing N tuples, each tuple containing the same number of named
bounding boxes.
Each box is of the format [y_min, x_min, y_max, x_max].
detected_scores: A float numpy array of shape [N,], representing
the confidence scores of the detected N object instances.
detected_class_tuples: A numpy array of structures shape [N,],
representing the class labels of the corresponding bounding boxes and
possibly additional classes.
groundtruth_box_tuples: A float numpy array of structures with the shape
[M,], representing M tuples, each tuple containing the same number
of named bounding boxes.
Each box is of the format [y_min, x_min, y_max, x_max].
groundtruth_class_tuples: A numpy array of structures shape [M,],
representing the class labels of the corresponding bounding boxes and
possibly additional classes.
Returns:
scores: A single numpy array with shape [N,], representing N scores
detected with object class, sorted in descentent order.
tp_fp_labels: A single boolean numpy array of shape [N,], representing N
True/False positive label, one label per tuple. The labels are sorted
so that the order of the labels matches the order of the scores.
"""
scores
,
tp_fp_labels
=
self
.
_compute_tp_fp
(
detected_box_tuples
=
detected_box_tuples
,
detected_scores
=
detected_scores
,
detected_class_tuples
=
detected_class_tuples
,
groundtruth_box_tuples
=
groundtruth_box_tuples
,
groundtruth_class_tuples
=
groundtruth_class_tuples
)
return
scores
,
tp_fp_labels
def
_compute_tp_fp
(
self
,
detected_box_tuples
,
detected_scores
,
detected_class_tuples
,
groundtruth_box_tuples
,
groundtruth_class_tuples
):
"""Labels as true/false positives detection tuples across all classes.
Args:
detected_box_tuples: A numpy array of structures with shape [N,],
representing N tuples, each tuple containing the same number of named
bounding boxes.
Each box is of the format [y_min, x_min, y_max, x_max]
detected_scores: A float numpy array of shape [N,], representing
the confidence scores of the detected N object instances.
detected_class_tuples: A numpy array of structures shape [N,],
representing the class labels of the corresponding bounding boxes and
possibly additional classes.
groundtruth_box_tuples: A float numpy array of structures with the shape
[M,], representing M tuples, each tuple containing the same number
of named bounding boxes.
Each box is of the format [y_min, x_min, y_max, x_max]
groundtruth_class_tuples: A numpy array of structures shape [M,],
representing the class labels of the corresponding bounding boxes and
possibly additional classes.
Returns:
scores: A single numpy array with shape [N,], representing N scores
detected with object class, sorted in descentent order.
tp_fp_labels: A single boolean numpy array of shape [N,], representing N
True/False positive label, one label per tuple. The labels are sorted
so that the order of the labels matches the order of the scores.
"""
unique_gt_tuples
=
np
.
unique
(
np
.
concatenate
((
groundtruth_class_tuples
,
detected_class_tuples
)))
result_scores
=
[]
result_tp_fp_labels
=
[]
for
unique_tuple
in
unique_gt_tuples
:
detections_selector
=
(
detected_class_tuples
==
unique_tuple
)
gt_selector
=
(
groundtruth_class_tuples
==
unique_tuple
)
scores
,
tp_fp_labels
=
self
.
_compute_tp_fp_for_single_class
(
detected_box_tuples
=
detected_box_tuples
[
detections_selector
],
detected_scores
=
detected_scores
[
detections_selector
],
groundtruth_box_tuples
=
groundtruth_box_tuples
[
gt_selector
])
result_scores
.
append
(
scores
)
result_tp_fp_labels
.
append
(
tp_fp_labels
)
result_scores
=
np
.
concatenate
(
result_scores
)
result_tp_fp_labels
=
np
.
concatenate
(
result_tp_fp_labels
)
sorted_indices
=
np
.
argsort
(
result_scores
)
sorted_indices
=
sorted_indices
[::
-
1
]
return
result_scores
[
sorted_indices
],
result_tp_fp_labels
[
sorted_indices
]
def
_get_overlaps_and_scores_relation_tuples
(
self
,
detected_box_tuples
,
detected_scores
,
groundtruth_box_tuples
):
"""Computes overlaps and scores between detected and groundtruth tuples.
Both detections and groundtruth boxes have the same class tuples.
Args:
detected_box_tuples: A numpy array of structures with shape [N,],
representing N tuples, each tuple containing the same number of named
bounding boxes.
Each box is of the format [y_min, x_min, y_max, x_max]
detected_scores: A float numpy array of shape [N,], representing
the confidence scores of the detected N object instances.
groundtruth_box_tuples: A float numpy array of structures with the shape
[M,], representing M tuples, each tuple containing the same number
of named bounding boxes.
Each box is of the format [y_min, x_min, y_max, x_max]
Returns:
result_iou: A float numpy array of size
[num_detected_tuples, num_gt_box_tuples].
scores: The score of the detected boxlist.
"""
result_iou
=
np
.
ones
(
(
detected_box_tuples
.
shape
[
0
],
groundtruth_box_tuples
.
shape
[
0
]),
dtype
=
float
)
for
field
in
detected_box_tuples
.
dtype
.
fields
:
detected_boxlist_field
=
np_box_list
.
BoxList
(
detected_box_tuples
[
field
])
detected_boxlist_field
.
add_field
(
'scores'
,
detected_scores
)
detected_boxlist_field
=
np_box_list_ops
.
sort_by_field
(
detected_boxlist_field
,
'scores'
)
gt_boxlist_field
=
np_box_list
.
BoxList
(
groundtruth_box_tuples
[
field
])
iou_field
=
np_box_list_ops
.
iou
(
detected_boxlist_field
,
gt_boxlist_field
)
result_iou
=
np
.
minimum
(
iou_field
,
result_iou
)
scores
=
detected_boxlist_field
.
get_field
(
'scores'
)
return
result_iou
,
scores
def
_compute_tp_fp_for_single_class
(
self
,
detected_box_tuples
,
detected_scores
,
groundtruth_box_tuples
):
"""Labels boxes detected with the same class from the same image as tp/fp.
Args:
detected_box_tuples: A numpy array of structures with shape [N,],
representing N tuples, each tuple containing the same number of named
bounding boxes.
Each box is of the format [y_min, x_min, y_max, x_max]
detected_scores: A float numpy array of shape [N,], representing
the confidence scores of the detected N object instances.
groundtruth_box_tuples: A float numpy array of structures with the shape
[M,], representing M tuples, each tuple containing the same number
of named bounding boxes.
Each box is of the format [y_min, x_min, y_max, x_max]
Returns:
Two arrays of the same size, containing true/false for N boxes that were
evaluated as being true positives or false positives;
scores: A numpy array representing the detection scores.
tp_fp_labels: a boolean numpy array indicating whether a detection is a
true positive.
"""
if
detected_box_tuples
.
size
==
0
:
return
np
.
array
([],
dtype
=
float
),
np
.
array
([],
dtype
=
bool
)
min_iou
,
scores
=
self
.
_get_overlaps_and_scores_relation_tuples
(
detected_box_tuples
=
detected_box_tuples
,
detected_scores
=
detected_scores
,
groundtruth_box_tuples
=
groundtruth_box_tuples
)
num_detected_tuples
=
detected_box_tuples
.
shape
[
0
]
tp_fp_labels
=
np
.
zeros
(
num_detected_tuples
,
dtype
=
bool
)
if
min_iou
.
shape
[
1
]
>
0
:
max_overlap_gt_ids
=
np
.
argmax
(
min_iou
,
axis
=
1
)
is_gt_tuple_detected
=
np
.
zeros
(
min_iou
.
shape
[
1
],
dtype
=
bool
)
for
i
in
range
(
num_detected_tuples
):
gt_id
=
max_overlap_gt_ids
[
i
]
if
min_iou
[
i
,
gt_id
]
>=
self
.
matching_iou_threshold
:
if
not
is_gt_tuple_detected
[
gt_id
]:
tp_fp_labels
[
i
]
=
True
is_gt_tuple_detected
[
gt_id
]
=
True
return
scores
,
tp_fp_labels
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment