Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
97760186
Unverified
Commit
97760186
authored
Jun 05, 2018
by
Jonathan Huang
Committed by
GitHub
Jun 05, 2018
Browse files
Merge pull request #4460 from pkulzc/master
Release evaluation code for OI Challenge 2018 and minor fixes.
parents
ed901b73
a703fc0c
Changes
59
Hide whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
1234 additions
and
118 deletions
+1234
-118
research/object_detection/models/ssd_mobilenet_v2_feature_extractor.py
...ct_detection/models/ssd_mobilenet_v2_feature_extractor.py
+12
-18
research/object_detection/models/ssd_mobilenet_v2_feature_extractor_test.py
...tection/models/ssd_mobilenet_v2_feature_extractor_test.py
+3
-3
research/object_detection/protos/image_resizer.proto
research/object_detection/protos/image_resizer.proto
+4
-0
research/object_detection/protos/input_reader.proto
research/object_detection/protos/input_reader.proto
+4
-0
research/object_detection/trainer.py
research/object_detection/trainer.py
+13
-2
research/object_detection/utils/config_util.py
research/object_detection/utils/config_util.py
+87
-20
research/object_detection/utils/config_util_test.py
research/object_detection/utils/config_util_test.py
+46
-0
research/object_detection/utils/dataset_util.py
research/object_detection/utils/dataset_util.py
+12
-6
research/object_detection/utils/dataset_util_test.py
research/object_detection/utils/dataset_util_test.py
+45
-0
research/object_detection/utils/ops.py
research/object_detection/utils/ops.py
+33
-19
research/object_detection/utils/ops_test.py
research/object_detection/utils/ops_test.py
+10
-0
research/object_detection/utils/per_image_vrd_evaluation.py
research/object_detection/utils/per_image_vrd_evaluation.py
+32
-31
research/object_detection/utils/per_image_vrd_evaluation_test.py
...h/object_detection/utils/per_image_vrd_evaluation_test.py
+9
-13
research/object_detection/utils/test_utils.py
research/object_detection/utils/test_utils.py
+33
-0
research/object_detection/utils/test_utils_test.py
research/object_detection/utils/test_utils_test.py
+16
-0
research/object_detection/utils/visualization_utils.py
research/object_detection/utils/visualization_utils.py
+19
-6
research/object_detection/utils/visualization_utils_test.py
research/object_detection/utils/visualization_utils_test.py
+29
-0
research/object_detection/utils/vrd_evaluation.py
research/object_detection/utils/vrd_evaluation.py
+572
-0
research/object_detection/utils/vrd_evaluation_test.py
research/object_detection/utils/vrd_evaluation_test.py
+255
-0
No files found.
research/object_detection/models/ssd_mobilenet_v2_feature_extractor.py
View file @
97760186
...
...
@@ -112,24 +112,18 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
with
(
slim
.
arg_scope
(
self
.
_conv_hyperparams_fn
())
if
self
.
_override_base_feature_extractor_hyperparams
else
context_manager
.
IdentityContextManager
()):
# TODO(b/68150321): Enable fused batch norm once quantization
# supports it.
with
slim
.
arg_scope
([
slim
.
batch_norm
],
fused
=
False
):
_
,
image_features
=
mobilenet_v2
.
mobilenet_base
(
ops
.
pad_to_multiple
(
preprocessed_inputs
,
self
.
_pad_to_multiple
),
final_endpoint
=
'layer_19'
,
depth_multiplier
=
self
.
_depth_multiplier
,
use_explicit_padding
=
self
.
_use_explicit_padding
,
scope
=
scope
)
_
,
image_features
=
mobilenet_v2
.
mobilenet_base
(
ops
.
pad_to_multiple
(
preprocessed_inputs
,
self
.
_pad_to_multiple
),
final_endpoint
=
'layer_19'
,
depth_multiplier
=
self
.
_depth_multiplier
,
use_explicit_padding
=
self
.
_use_explicit_padding
,
scope
=
scope
)
with
slim
.
arg_scope
(
self
.
_conv_hyperparams_fn
()):
# TODO(b/68150321): Enable fused batch norm once quantization
# supports it.
with
slim
.
arg_scope
([
slim
.
batch_norm
],
fused
=
False
):
feature_maps
=
feature_map_generators
.
multi_resolution_feature_maps
(
feature_map_layout
=
feature_map_layout
,
depth_multiplier
=
self
.
_depth_multiplier
,
min_depth
=
self
.
_min_depth
,
insert_1x1_conv
=
True
,
image_features
=
image_features
)
feature_maps
=
feature_map_generators
.
multi_resolution_feature_maps
(
feature_map_layout
=
feature_map_layout
,
depth_multiplier
=
self
.
_depth_multiplier
,
min_depth
=
self
.
_min_depth
,
insert_1x1_conv
=
True
,
image_features
=
image_features
)
return
feature_maps
.
values
()
research/object_detection/models/ssd_mobilenet_v2_feature_extractor_test.py
View file @
97760186
...
...
@@ -135,7 +135,7 @@ class SsdMobilenetV2FeatureExtractorTest(
self
.
check_feature_extractor_variables_under_scope
(
depth_multiplier
,
pad_to_multiple
,
scope_name
)
def
test_
no
fused_batchnorm
(
self
):
def
test_
has_
fused_batchnorm
(
self
):
image_height
=
40
image_width
=
40
depth_multiplier
=
1
...
...
@@ -146,8 +146,8 @@ class SsdMobilenetV2FeatureExtractorTest(
pad_to_multiple
)
preprocessed_image
=
feature_extractor
.
preprocess
(
image_placeholder
)
_
=
feature_extractor
.
extract_features
(
preprocessed_image
)
self
.
assert
Fals
e
(
any
(
op
.
type
==
'FusedBatchNorm'
for
op
in
tf
.
get_default_graph
().
get_operations
()))
self
.
assert
Tru
e
(
any
(
op
.
type
==
'FusedBatchNorm'
for
op
in
tf
.
get_default_graph
().
get_operations
()))
if
__name__
==
'__main__'
:
...
...
research/object_detection/protos/image_resizer.proto
View file @
97760186
...
...
@@ -37,6 +37,10 @@ message KeepAspectRatioResizer {
// Whether to also resize the image channels from 3 to 1 (RGB to grayscale).
optional
bool
convert_to_grayscale
=
5
[
default
=
false
];
// Per-channel pad value. This is only used when pad_to_max_dimension is True.
// If unspecified, a default pad value of 0 is applied to all channels.
repeated
float
per_channel_pad_value
=
6
;
}
// Configuration proto for image resizer that resizes to a fixed shape.
...
...
research/object_detection/protos/input_reader.proto
View file @
97760186
...
...
@@ -69,6 +69,10 @@ message InputReader {
// Type of instance mask.
optional
InstanceMaskType
mask_type
=
10
[
default
=
NUMERICAL_MASKS
];
// Whether to use the display name when decoding examples. This is only used
// when mapping class text strings to integers.
optional
bool
use_display_name
=
17
[
default
=
false
];
oneof
input_reader
{
TFRecordInputReader
tf_record_input_reader
=
8
;
ExternalInputReader
external_input_reader
=
9
;
...
...
research/object_detection/trainer.py
View file @
97760186
...
...
@@ -235,6 +235,9 @@ def train(create_tensor_dict_fn,
built (before optimization). This is helpful to perform additional changes
to the training graph such as adding FakeQuant ops. The function should
modify the default graph.
Raises:
ValueError: If both num_clones > 1 and train_config.sync_replicas is true.
"""
detection_model
=
create_model_fn
()
...
...
@@ -256,9 +259,16 @@ def train(create_tensor_dict_fn,
with
tf
.
device
(
deploy_config
.
variables_device
()):
global_step
=
slim
.
create_global_step
()
if
num_clones
!=
1
and
train_config
.
sync_replicas
:
raise
ValueError
(
'In Synchronous SGD mode num_clones must '
,
'be 1. Found num_clones: {}'
.
format
(
num_clones
))
batch_size
=
train_config
.
batch_size
//
num_clones
if
train_config
.
sync_replicas
:
batch_size
//=
train_config
.
replicas_to_aggregate
with
tf
.
device
(
deploy_config
.
inputs_device
()):
input_queue
=
create_input_queue
(
train_config
.
batch_size
//
num_clones
,
create_tensor_dict_fn
,
batch_size
,
create_tensor_dict_fn
,
train_config
.
batch_queue_capacity
,
train_config
.
num_batch_queue_threads
,
train_config
.
prefetch_queue_capacity
,
data_augmentation_options
)
...
...
@@ -377,7 +387,8 @@ def train(create_tensor_dict_fn,
train_config
.
load_all_detection_checkpoint_vars
))
available_var_map
=
(
variables_helper
.
get_variables_available_in_checkpoint
(
var_map
,
train_config
.
fine_tune_checkpoint
))
var_map
,
train_config
.
fine_tune_checkpoint
,
include_global_step
=
False
))
init_saver
=
tf
.
train
.
Saver
(
available_var_map
)
def
initializer_fn
(
sess
):
init_saver
.
restore
(
sess
,
train_config
.
fine_tune_checkpoint
)
...
...
research/object_detection/utils/config_util.py
View file @
97760186
...
...
@@ -278,6 +278,19 @@ def get_learning_rate_type(optimizer_config):
return
optimizer_config
.
learning_rate
.
WhichOneof
(
"learning_rate"
)
def
_is_generic_key
(
key
):
"""Determines whether the key starts with a generic config dictionary key."""
for
prefix
in
[
"graph_rewriter_config"
,
"model"
,
"train_input_config"
,
"train_input_config"
,
"train_config"
]:
if
key
.
startswith
(
prefix
+
"."
):
return
True
return
False
def
merge_external_params_with_configs
(
configs
,
hparams
=
None
,
**
kwargs
):
"""Updates `configs` dictionary based on supplied parameters.
...
...
@@ -287,6 +300,16 @@ def merge_external_params_with_configs(configs, hparams=None, **kwargs):
experiment, one can use a single base config file, and update particular
values.
There are two types of field overrides:
1. Strategy-based overrides, which update multiple relevant configuration
options. For example, updating `learning_rate` will update both the warmup and
final learning rates.
2. Generic key/value, which update a specific parameter based on namespaced
configuration keys. For example,
`model.ssd.loss.hard_example_miner.max_negatives_per_positive` will update the
hard example miner configuration for an SSD model config. Generic overrides
are automatically detected based on the namespaced keys.
Args:
configs: Dictionary of configuration objects. See outputs from
get_configs_from_pipeline_file() or get_configs_from_multiple_files().
...
...
@@ -302,44 +325,42 @@ def merge_external_params_with_configs(configs, hparams=None, **kwargs):
if
hparams
:
kwargs
.
update
(
hparams
.
values
())
for
key
,
value
in
kwargs
.
items
():
tf
.
logging
.
info
(
"Maybe overwriting %s: %s"
,
key
,
value
)
# pylint: disable=g-explicit-bool-comparison
if
value
==
""
or
value
is
None
:
continue
# pylint: enable=g-explicit-bool-comparison
if
key
==
"learning_rate"
:
_update_initial_learning_rate
(
configs
,
value
)
tf
.
logging
.
info
(
"Overwriting learning rate: %f"
,
value
)
if
key
==
"batch_size"
:
elif
key
==
"batch_size"
:
_update_batch_size
(
configs
,
value
)
tf
.
logging
.
info
(
"Overwriting batch size: %d"
,
value
)
if
key
==
"momentum_optimizer_value"
:
elif
key
==
"momentum_optimizer_value"
:
_update_momentum_optimizer_value
(
configs
,
value
)
tf
.
logging
.
info
(
"Overwriting momentum optimizer value: %f"
,
value
)
if
key
==
"classification_localization_weight_ratio"
:
elif
key
==
"classification_localization_weight_ratio"
:
# Localization weight is fixed to 1.0.
_update_classification_localization_weight_ratio
(
configs
,
value
)
if
key
==
"focal_loss_gamma"
:
el
if
key
==
"focal_loss_gamma"
:
_update_focal_loss_gamma
(
configs
,
value
)
if
key
==
"focal_loss_alpha"
:
el
if
key
==
"focal_loss_alpha"
:
_update_focal_loss_alpha
(
configs
,
value
)
if
key
==
"train_steps"
:
el
if
key
==
"train_steps"
:
_update_train_steps
(
configs
,
value
)
tf
.
logging
.
info
(
"Overwriting train steps: %d"
,
value
)
if
key
==
"eval_steps"
:
elif
key
==
"eval_steps"
:
_update_eval_steps
(
configs
,
value
)
tf
.
logging
.
info
(
"Overwriting eval steps: %d"
,
value
)
if
key
==
"train_input_path"
:
elif
key
==
"train_input_path"
:
_update_input_path
(
configs
[
"train_input_config"
],
value
)
tf
.
logging
.
info
(
"Overwriting train input path: %s"
,
value
)
if
key
==
"eval_input_path"
:
elif
key
==
"eval_input_path"
:
_update_input_path
(
configs
[
"eval_input_config"
],
value
)
tf
.
logging
.
info
(
"Overwriting eval input path: %s"
,
value
)
if
key
==
"label_map_path"
:
elif
key
==
"label_map_path"
:
_update_label_map_path
(
configs
,
value
)
tf
.
logging
.
info
(
"Overwriting label map path: %s"
,
value
)
if
key
==
"mask_type"
:
elif
key
==
"mask_type"
:
_update_mask_type
(
configs
,
value
)
tf
.
logging
.
info
(
"Overwritten mask type: %s"
,
value
)
elif
key
==
"eval_with_moving_averages"
:
_update_use_moving_averages
(
configs
,
value
)
elif
_is_generic_key
(
key
):
_update_generic
(
configs
,
key
,
value
)
else
:
tf
.
logging
.
info
(
"Ignoring config override key: %s"
,
key
)
return
configs
...
...
@@ -411,6 +432,38 @@ def _update_batch_size(configs, batch_size):
configs
[
"train_config"
].
batch_size
=
max
(
1
,
int
(
round
(
batch_size
)))
def
_validate_message_has_field
(
message
,
field
):
if
not
message
.
HasField
(
field
):
raise
ValueError
(
"Expecting message to have field %s"
%
field
)
def
_update_generic
(
configs
,
key
,
value
):
"""Update a pipeline configuration parameter based on a generic key/value.
Args:
configs: Dictionary of pipeline configuration protos.
key: A string key, dot-delimited to represent the argument key.
e.g. "model.ssd.train_config.batch_size"
value: A value to set the argument to. The type of the value must match the
type for the protocol buffer. Note that setting the wrong type will
result in a TypeError.
e.g. 42
Raises:
ValueError if the message key does not match the existing proto fields.
TypeError the value type doesn't match the protobuf field type.
"""
fields
=
key
.
split
(
"."
)
first_field
=
fields
.
pop
(
0
)
last_field
=
fields
.
pop
()
message
=
configs
[
first_field
]
for
field
in
fields
:
_validate_message_has_field
(
message
,
field
)
message
=
getattr
(
message
,
field
)
_validate_message_has_field
(
message
,
last_field
)
setattr
(
message
,
last_field
,
value
)
def
_update_momentum_optimizer_value
(
configs
,
momentum
):
"""Updates `configs` to reflect the new momentum value.
...
...
@@ -587,3 +640,17 @@ def _update_mask_type(configs, mask_type):
"""
configs
[
"train_input_config"
].
mask_type
=
mask_type
configs
[
"eval_input_config"
].
mask_type
=
mask_type
def
_update_use_moving_averages
(
configs
,
use_moving_averages
):
"""Updates the eval config option to use or not use moving averages.
The configs dictionary is updated in place, and hence not returned.
Args:
configs: Dictionary of configuration objects. See outputs from
get_configs_from_pipeline_file() or get_configs_from_multiple_files().
use_moving_averages: Boolean indicating whether moving average variables
should be loaded during evaluation.
"""
configs
[
"eval_config"
].
use_moving_averages
=
use_moving_averages
research/object_detection/utils/config_util_test.py
View file @
97760186
...
...
@@ -69,6 +69,11 @@ def _update_optimizer_with_cosine_decay_learning_rate(
class
ConfigUtilTest
(
tf
.
test
.
TestCase
):
def
_create_and_load_test_configs
(
self
,
pipeline_config
):
pipeline_config_path
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
"pipeline.config"
)
_write_config
(
pipeline_config
,
pipeline_config_path
)
return
config_util
.
get_configs_from_pipeline_file
(
pipeline_config_path
)
def
test_get_configs_from_pipeline_file
(
self
):
"""Test that proto configs can be read from pipeline config file."""
pipeline_config_path
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
"pipeline.config"
)
...
...
@@ -307,6 +312,34 @@ class ConfigUtilTest(tf.test.TestCase):
new_batch_size
=
configs
[
"train_config"
].
batch_size
self
.
assertEqual
(
1
,
new_batch_size
)
# Clipped to 1.0.
def
testOverwriteBatchSizeWithKeyValue
(
self
):
"""Tests that batch size is overwritten based on key/value."""
pipeline_config
=
pipeline_pb2
.
TrainEvalPipelineConfig
()
pipeline_config
.
train_config
.
batch_size
=
2
configs
=
self
.
_create_and_load_test_configs
(
pipeline_config
)
hparams
=
tf
.
contrib
.
training
.
HParams
(
**
{
"train_config.batch_size"
:
10
})
configs
=
config_util
.
merge_external_params_with_configs
(
configs
,
hparams
)
new_batch_size
=
configs
[
"train_config"
].
batch_size
self
.
assertEqual
(
10
,
new_batch_size
)
def
testKeyValueOverrideBadKey
(
self
):
"""Tests that overwriting with a bad key causes an exception."""
pipeline_config
=
pipeline_pb2
.
TrainEvalPipelineConfig
()
configs
=
self
.
_create_and_load_test_configs
(
pipeline_config
)
hparams
=
tf
.
contrib
.
training
.
HParams
(
**
{
"train_config.no_such_field"
:
10
})
with
self
.
assertRaises
(
ValueError
):
config_util
.
merge_external_params_with_configs
(
configs
,
hparams
)
def
testOverwriteBatchSizeWithBadValueType
(
self
):
"""Tests that overwriting with a bad valuye type causes an exception."""
pipeline_config
=
pipeline_pb2
.
TrainEvalPipelineConfig
()
pipeline_config
.
train_config
.
batch_size
=
2
configs
=
self
.
_create_and_load_test_configs
(
pipeline_config
)
# Type should be an integer, but we're passing a string "10".
hparams
=
tf
.
contrib
.
training
.
HParams
(
**
{
"train_config.batch_size"
:
"10"
})
with
self
.
assertRaises
(
TypeError
):
config_util
.
merge_external_params_with_configs
(
configs
,
hparams
)
def
testNewMomentumOptimizerValue
(
self
):
"""Tests that new momentum value is updated appropriately."""
original_momentum_value
=
0.4
...
...
@@ -501,6 +534,19 @@ class ConfigUtilTest(tf.test.TestCase):
self
.
assertEqual
(
new_mask_type
,
configs
[
"train_input_config"
].
mask_type
)
self
.
assertEqual
(
new_mask_type
,
configs
[
"eval_input_config"
].
mask_type
)
def
testUseMovingAverageForEval
(
self
):
use_moving_averages_orig
=
False
pipeline_config_path
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
"pipeline.config"
)
pipeline_config
=
pipeline_pb2
.
TrainEvalPipelineConfig
()
pipeline_config
.
eval_config
.
use_moving_averages
=
use_moving_averages_orig
_write_config
(
pipeline_config
,
pipeline_config_path
)
configs
=
config_util
.
get_configs_from_pipeline_file
(
pipeline_config_path
)
configs
=
config_util
.
merge_external_params_with_configs
(
configs
,
eval_with_moving_averages
=
True
)
self
.
assertEqual
(
True
,
configs
[
"eval_config"
].
use_moving_averages
)
def
test_get_image_resizer_config
(
self
):
"""Tests that number of classes can be retrieved."""
model_config
=
model_pb2
.
DetectionModel
()
...
...
research/object_detection/utils/dataset_util.py
View file @
97760186
...
...
@@ -117,13 +117,17 @@ def read_dataset(file_read_func, decode_func, input_files, config):
A tf.data.Dataset based on config.
"""
# Shard, shuffle, and read files.
filenames
=
tf
.
concat
([
tf
.
matching_files
(
pattern
)
for
pattern
in
input_files
],
0
)
filename_dataset
=
tf
.
data
.
Dataset
.
from_tensor_slices
(
filenames
)
filenames
=
tf
.
gfile
.
Glob
(
input_files
)
num_readers
=
config
.
num_readers
if
num_readers
>
len
(
filenames
):
num_readers
=
len
(
filenames
)
tf
.
logging
.
warning
(
'num_readers has been reduced to %d to match input file '
'shards.'
%
num_readers
)
filename_dataset
=
tf
.
data
.
Dataset
.
from_tensor_slices
(
tf
.
unstack
(
filenames
))
if
config
.
shuffle
:
filename_dataset
=
filename_dataset
.
shuffle
(
config
.
filenames_shuffle_buffer_size
)
elif
config
.
num_readers
>
1
:
elif
num_readers
>
1
:
tf
.
logging
.
warning
(
'`shuffle` is false, but the input data stream is '
'still slightly shuffled since `num_readers` > 1.'
)
...
...
@@ -131,8 +135,10 @@ def read_dataset(file_read_func, decode_func, input_files, config):
records_dataset
=
filename_dataset
.
apply
(
tf
.
contrib
.
data
.
parallel_interleave
(
file_read_func
,
cycle_length
=
config
.
num_readers
,
block_length
=
config
.
read_block_length
,
sloppy
=
config
.
shuffle
))
file_read_func
,
cycle_length
=
num_readers
,
block_length
=
config
.
read_block_length
,
sloppy
=
config
.
shuffle
))
if
config
.
shuffle
:
records_dataset
=
records_dataset
.
shuffle
(
config
.
shuffle_buffer_size
)
tensor_dataset
=
records_dataset
.
map
(
...
...
research/object_detection/utils/dataset_util_test.py
View file @
97760186
...
...
@@ -16,6 +16,7 @@
"""Tests for object_detection.utils.dataset_util."""
import
os
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.protos
import
input_reader_pb2
...
...
@@ -32,6 +33,13 @@ class DatasetUtilTest(tf.test.TestCase):
with
tf
.
gfile
.
Open
(
path
,
'wb'
)
as
f
:
f
.
write
(
'
\n
'
.
join
([
str
(
i
+
1
),
str
((
i
+
1
)
*
10
)]))
self
.
_shuffle_path_template
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
'shuffle_%s.txt'
)
for
i
in
range
(
2
):
path
=
self
.
_shuffle_path_template
%
i
with
tf
.
gfile
.
Open
(
path
,
'wb'
)
as
f
:
f
.
write
(
'
\n
'
.
join
([
str
(
i
)]
*
5
))
def
_get_dataset_next
(
self
,
files
,
config
,
batch_size
):
def
decode_func
(
value
):
return
[
tf
.
string_to_number
(
value
,
out_type
=
tf
.
int32
)]
...
...
@@ -78,6 +86,43 @@ class DatasetUtilTest(tf.test.TestCase):
[[
1
,
10
,
2
,
20
,
3
,
30
,
4
,
40
,
5
,
50
,
1
,
10
,
2
,
20
,
3
,
30
,
4
,
40
,
5
,
50
]])
def
test_reduce_num_reader
(
self
):
config
=
input_reader_pb2
.
InputReader
()
config
.
num_readers
=
10
config
.
shuffle
=
False
data
=
self
.
_get_dataset_next
([
self
.
_path_template
%
'*'
],
config
,
batch_size
=
20
)
with
self
.
test_session
()
as
sess
:
self
.
assertAllEqual
(
sess
.
run
(
data
),
[[
1
,
10
,
2
,
20
,
3
,
30
,
4
,
40
,
5
,
50
,
1
,
10
,
2
,
20
,
3
,
30
,
4
,
40
,
5
,
50
]])
def
test_enable_shuffle
(
self
):
config
=
input_reader_pb2
.
InputReader
()
config
.
num_readers
=
1
config
.
shuffle
=
True
data
=
self
.
_get_dataset_next
(
[
self
.
_shuffle_path_template
%
'*'
],
config
,
batch_size
=
10
)
expected_non_shuffle_output
=
[
0
,
0
,
0
,
0
,
0
,
1
,
1
,
1
,
1
,
1
]
with
self
.
test_session
()
as
sess
:
self
.
assertTrue
(
np
.
any
(
np
.
not_equal
(
sess
.
run
(
data
),
expected_non_shuffle_output
)))
def
test_disable_shuffle_
(
self
):
config
=
input_reader_pb2
.
InputReader
()
config
.
num_readers
=
1
config
.
shuffle
=
False
data
=
self
.
_get_dataset_next
(
[
self
.
_shuffle_path_template
%
'*'
],
config
,
batch_size
=
10
)
expected_non_shuffle_output
=
[
0
,
0
,
0
,
0
,
0
,
1
,
1
,
1
,
1
,
1
]
with
self
.
test_session
()
as
sess
:
self
.
assertAllEqual
(
sess
.
run
(
data
),
[
expected_non_shuffle_output
])
def
test_read_dataset_single_epoch
(
self
):
config
=
input_reader_pb2
.
InputReader
()
config
.
num_epochs
=
1
...
...
research/object_detection/utils/ops.py
View file @
97760186
...
...
@@ -318,8 +318,9 @@ def retain_groundtruth(tensor_dict, valid_indices):
Args:
tensor_dict: a dictionary of following groundtruth tensors -
fields.InputDataFields.groundtruth_boxes
fields.InputDataFields.groundtruth_instance_masks
fields.InputDataFields.groundtruth_classes
fields.InputDataFields.groundtruth_keypoints
fields.InputDataFields.groundtruth_instance_masks
fields.InputDataFields.groundtruth_is_crowd
fields.InputDataFields.groundtruth_area
fields.InputDataFields.groundtruth_label_types
...
...
@@ -347,6 +348,7 @@ def retain_groundtruth(tensor_dict, valid_indices):
for
key
in
tensor_dict
:
if
key
in
[
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
,
fields
.
InputDataFields
.
groundtruth_keypoints
,
fields
.
InputDataFields
.
groundtruth_instance_masks
]:
valid_dict
[
key
]
=
tf
.
gather
(
tensor_dict
[
key
],
valid_indices
)
# Input decoder returns empty tensor when these fields are not provided.
...
...
@@ -374,6 +376,8 @@ def retain_groundtruth_with_positive_classes(tensor_dict):
tensor_dict: a dictionary of following groundtruth tensors -
fields.InputDataFields.groundtruth_boxes
fields.InputDataFields.groundtruth_classes
fields.InputDataFields.groundtruth_keypoints
fields.InputDataFields.groundtruth_instance_masks
fields.InputDataFields.groundtruth_is_crowd
fields.InputDataFields.groundtruth_area
fields.InputDataFields.groundtruth_label_types
...
...
@@ -413,6 +417,8 @@ def filter_groundtruth_with_crowd_boxes(tensor_dict):
tensor_dict: a dictionary of following groundtruth tensors -
fields.InputDataFields.groundtruth_boxes
fields.InputDataFields.groundtruth_classes
fields.InputDataFields.groundtruth_keypoints
fields.InputDataFields.groundtruth_instance_masks
fields.InputDataFields.groundtruth_is_crowd
fields.InputDataFields.groundtruth_area
fields.InputDataFields.groundtruth_label_types
...
...
@@ -435,8 +441,9 @@ def filter_groundtruth_with_nan_box_coordinates(tensor_dict):
Args:
tensor_dict: a dictionary of following groundtruth tensors -
fields.InputDataFields.groundtruth_boxes
fields.InputDataFields.groundtruth_instance_masks
fields.InputDataFields.groundtruth_classes
fields.InputDataFields.groundtruth_keypoints
fields.InputDataFields.groundtruth_instance_masks
fields.InputDataFields.groundtruth_is_crowd
fields.InputDataFields.groundtruth_area
fields.InputDataFields.groundtruth_label_types
...
...
@@ -703,23 +710,30 @@ def reframe_box_masks_to_image_masks(box_masks, boxes, image_height,
A tf.float32 tensor of size [num_masks, image_height, image_width].
"""
# TODO(rathodv): Make this a public function.
def
transform_boxes_relative_to_boxes
(
boxes
,
reference_boxes
):
boxes
=
tf
.
reshape
(
boxes
,
[
-
1
,
2
,
2
])
min_corner
=
tf
.
expand_dims
(
reference_boxes
[:,
0
:
2
],
1
)
max_corner
=
tf
.
expand_dims
(
reference_boxes
[:,
2
:
4
],
1
)
transformed_boxes
=
(
boxes
-
min_corner
)
/
(
max_corner
-
min_corner
)
return
tf
.
reshape
(
transformed_boxes
,
[
-
1
,
4
])
box_masks
=
tf
.
expand_dims
(
box_masks
,
axis
=
3
)
num_boxes
=
tf
.
shape
(
box_masks
)[
0
]
unit_boxes
=
tf
.
concat
(
[
tf
.
zeros
([
num_boxes
,
2
]),
tf
.
ones
([
num_boxes
,
2
])],
axis
=
1
)
reverse_boxes
=
transform_boxes_relative_to_boxes
(
unit_boxes
,
boxes
)
image_masks
=
tf
.
image
.
crop_and_resize
(
image
=
box_masks
,
boxes
=
reverse_boxes
,
box_ind
=
tf
.
range
(
num_boxes
),
crop_size
=
[
image_height
,
image_width
],
extrapolation_value
=
0.0
)
def
reframe_box_masks_to_image_masks_default
():
"""The default function when there are more than 0 box masks."""
def
transform_boxes_relative_to_boxes
(
boxes
,
reference_boxes
):
boxes
=
tf
.
reshape
(
boxes
,
[
-
1
,
2
,
2
])
min_corner
=
tf
.
expand_dims
(
reference_boxes
[:,
0
:
2
],
1
)
max_corner
=
tf
.
expand_dims
(
reference_boxes
[:,
2
:
4
],
1
)
transformed_boxes
=
(
boxes
-
min_corner
)
/
(
max_corner
-
min_corner
)
return
tf
.
reshape
(
transformed_boxes
,
[
-
1
,
4
])
box_masks_expanded
=
tf
.
expand_dims
(
box_masks
,
axis
=
3
)
num_boxes
=
tf
.
shape
(
box_masks_expanded
)[
0
]
unit_boxes
=
tf
.
concat
(
[
tf
.
zeros
([
num_boxes
,
2
]),
tf
.
ones
([
num_boxes
,
2
])],
axis
=
1
)
reverse_boxes
=
transform_boxes_relative_to_boxes
(
unit_boxes
,
boxes
)
return
tf
.
image
.
crop_and_resize
(
image
=
box_masks_expanded
,
boxes
=
reverse_boxes
,
box_ind
=
tf
.
range
(
num_boxes
),
crop_size
=
[
image_height
,
image_width
],
extrapolation_value
=
0.0
)
image_masks
=
tf
.
cond
(
tf
.
shape
(
box_masks
)[
0
]
>
0
,
reframe_box_masks_to_image_masks_default
,
lambda
:
tf
.
zeros
([
0
,
image_height
,
image_width
,
1
],
dtype
=
tf
.
float32
))
return
tf
.
squeeze
(
image_masks
,
axis
=
3
)
...
...
research/object_detection/utils/ops_test.py
View file @
97760186
...
...
@@ -1100,6 +1100,16 @@ class ReframeBoxMasksToImageMasksTest(tf.test.TestCase):
np_image_masks
=
sess
.
run
(
image_masks
)
self
.
assertAllClose
(
np_image_masks
,
np_expected_image_masks
)
def
testZeroBoxMasks
(
self
):
box_masks
=
tf
.
zeros
([
0
,
3
,
3
],
dtype
=
tf
.
float32
)
boxes
=
tf
.
zeros
([
0
,
4
],
dtype
=
tf
.
float32
)
image_masks
=
ops
.
reframe_box_masks_to_image_masks
(
box_masks
,
boxes
,
image_height
=
4
,
image_width
=
4
)
with
self
.
test_session
()
as
sess
:
np_image_masks
=
sess
.
run
(
image_masks
)
self
.
assertAllEqual
(
np_image_masks
.
shape
,
np
.
array
([
0
,
4
,
4
]))
def
testMaskIsCenteredInImageWhenBoxIsCentered
(
self
):
box_masks
=
tf
.
constant
([[[
1
,
1
],
[
1
,
1
]]],
dtype
=
tf
.
float32
)
...
...
research/object_detection/utils/per_image_vrd_evaluation.py
View file @
97760186
...
...
@@ -67,16 +67,18 @@ class PerImageVRDEvaluation(object):
tp_fp_labels: A single boolean numpy array of shape [N,], representing N
True/False positive label, one label per tuple. The labels are sorted
so that the order of the labels matches the order of the scores.
result_mapping: A numpy array with shape [N,] with original index of each
entry.
"""
scores
,
tp_fp_labels
=
self
.
_compute_tp_fp
(
scores
,
tp_fp_labels
,
result_mapping
=
self
.
_compute_tp_fp
(
detected_box_tuples
=
detected_box_tuples
,
detected_scores
=
detected_scores
,
detected_class_tuples
=
detected_class_tuples
,
groundtruth_box_tuples
=
groundtruth_box_tuples
,
groundtruth_class_tuples
=
groundtruth_class_tuples
)
return
scores
,
tp_fp_labels
return
scores
,
tp_fp_labels
,
result_mapping
def
_compute_tp_fp
(
self
,
detected_box_tuples
,
detected_scores
,
detected_class_tuples
,
groundtruth_box_tuples
,
...
...
@@ -107,33 +109,46 @@ class PerImageVRDEvaluation(object):
tp_fp_labels: A single boolean numpy array of shape [N,], representing N
True/False positive label, one label per tuple. The labels are sorted
so that the order of the labels matches the order of the scores.
result_mapping: A numpy array with shape [N,] with original index of each
entry.
"""
unique_gt_tuples
=
np
.
unique
(
np
.
concatenate
((
groundtruth_class_tuples
,
detected_class_tuples
)))
result_scores
=
[]
result_tp_fp_labels
=
[]
result_mapping
=
[]
for
unique_tuple
in
unique_gt_tuples
:
detections_selector
=
(
detected_class_tuples
==
unique_tuple
)
gt_selector
=
(
groundtruth_class_tuples
==
unique_tuple
)
scores
,
tp_fp_labels
=
self
.
_compute_tp_fp_for_single_class
(
detected_box_tuples
=
detected_box_tuples
[
detections_selector
],
detected_scores
=
detected_scores
[
detections_selector
],
selector_mapping
=
np
.
where
(
detections_selector
)[
0
]
detection_scores_per_tuple
=
detected_scores
[
detections_selector
]
detection_box_per_tuple
=
detected_box_tuples
[
detections_selector
]
sorted_indices
=
np
.
argsort
(
detection_scores_per_tuple
)
sorted_indices
=
sorted_indices
[::
-
1
]
tp_fp_labels
=
self
.
_compute_tp_fp_for_single_class
(
detected_box_tuples
=
detection_box_per_tuple
[
sorted_indices
],
groundtruth_box_tuples
=
groundtruth_box_tuples
[
gt_selector
])
result_scores
.
append
(
scores
)
result_scores
.
append
(
detection_scores_per_tuple
[
sorted_indices
]
)
result_tp_fp_labels
.
append
(
tp_fp_labels
)
result_mapping
.
append
(
selector_mapping
[
sorted_indices
])
result_scores
=
np
.
concatenate
(
result_scores
)
result_tp_fp_labels
=
np
.
concatenate
(
result_tp_fp_labels
)
result_mapping
=
np
.
concatenate
(
result_mapping
)
sorted_indices
=
np
.
argsort
(
result_scores
)
sorted_indices
=
sorted_indices
[::
-
1
]
return
result_scores
[
sorted_indices
],
result_tp_fp_labels
[
sorted_indices
]
return
result_scores
[
sorted_indices
],
result_tp_fp_labels
[
sorted_indices
],
result_mapping
[
sorted_indices
]
def
_get_overlaps_and_scores_relation_tuples
(
self
,
detected_box_tuples
,
detected_scores
,
groundtruth_box_tuples
):
def
_get_overlaps_and_scores_relation_tuples
(
self
,
detected_box_tuples
,
groundtruth_box_tuples
):
"""Computes overlaps and scores between detected and groundtruth tuples.
Both detections and groundtruth boxes have the same class tuples.
...
...
@@ -143,8 +158,6 @@ class PerImageVRDEvaluation(object):
representing N tuples, each tuple containing the same number of named
bounding boxes.
Each box is of the format [y_min, x_min, y_max, x_max]
detected_scores: A float numpy array of shape [N,], representing
the confidence scores of the detected N object instances.
groundtruth_box_tuples: A float numpy array of structures with the shape
[M,], representing M tuples, each tuple containing the same number
of named bounding boxes.
...
...
@@ -153,7 +166,6 @@ class PerImageVRDEvaluation(object):
Returns:
result_iou: A float numpy array of size
[num_detected_tuples, num_gt_box_tuples].
scores: The score of the detected boxlist.
"""
result_iou
=
np
.
ones
(
...
...
@@ -161,46 +173,35 @@ class PerImageVRDEvaluation(object):
dtype
=
float
)
for
field
in
detected_box_tuples
.
dtype
.
fields
:
detected_boxlist_field
=
np_box_list
.
BoxList
(
detected_box_tuples
[
field
])
detected_boxlist_field
.
add_field
(
'scores'
,
detected_scores
)
detected_boxlist_field
=
np_box_list_ops
.
sort_by_field
(
detected_boxlist_field
,
'scores'
)
gt_boxlist_field
=
np_box_list
.
BoxList
(
groundtruth_box_tuples
[
field
])
iou_field
=
np_box_list_ops
.
iou
(
detected_boxlist_field
,
gt_boxlist_field
)
result_iou
=
np
.
minimum
(
iou_field
,
result_iou
)
scores
=
detected_boxlist_field
.
get_field
(
'scores'
)
return
result_iou
,
scores
return
result_iou
def
_compute_tp_fp_for_single_class
(
self
,
detected_box_tuples
,
detected_scores
,
groundtruth_box_tuples
):
groundtruth_box_tuples
):
"""Labels boxes detected with the same class from the same image as tp/fp.
Detection boxes are expected to be already sorted by score.
Args:
detected_box_tuples: A numpy array of structures with shape [N,],
representing N tuples, each tuple containing the same number of named
bounding boxes.
Each box is of the format [y_min, x_min, y_max, x_max]
detected_scores: A float numpy array of shape [N,], representing
the confidence scores of the detected N object instances.
groundtruth_box_tuples: A float numpy array of structures with the shape
[M,], representing M tuples, each tuple containing the same number
of named bounding boxes.
Each box is of the format [y_min, x_min, y_max, x_max]
Returns:
Two arrays of the same size, containing true/false for N boxes that were
evaluated as being true positives or false positives;
scores: A numpy array representing the detection scores.
tp_fp_labels: a boolean numpy array indicating whether a detection is a
true positive.
"""
if
detected_box_tuples
.
size
==
0
:
return
np
.
array
([],
dtype
=
float
),
np
.
array
([],
dtype
=
bool
)
return
np
.
array
([],
dtype
=
bool
)
min_iou
,
scores
=
self
.
_get_overlaps_and_scores_relation_tuples
(
detected_box_tuples
=
detected_box_tuples
,
detected_scores
=
detected_scores
,
groundtruth_box_tuples
=
groundtruth_box_tuples
)
min_iou
=
self
.
_get_overlaps_and_scores_relation_tuples
(
detected_box_tuples
,
groundtruth_box_tuples
)
num_detected_tuples
=
detected_box_tuples
.
shape
[
0
]
tp_fp_labels
=
np
.
zeros
(
num_detected_tuples
,
dtype
=
bool
)
...
...
@@ -215,4 +216,4 @@ class PerImageVRDEvaluation(object):
tp_fp_labels
[
i
]
=
True
is_gt_tuple_detected
[
gt_id
]
=
True
return
scores
,
tp_fp_labels
return
tp_fp_labels
research/object_detection/utils/per_image_vrd_evaluation_test.py
View file @
97760186
...
...
@@ -28,31 +28,25 @@ class SingleClassPerImageVrdEvaluationTest(tf.test.TestCase):
box_data_type
=
np
.
dtype
([(
'subject'
,
'f4'
,
(
4
,)),
(
'object'
,
'f4'
,
(
4
,))])
self
.
detected_box_tuples
=
np
.
array
(
[([
0
,
0
,
1
,
1
],
[
1
,
1
,
2
,
2
]),
([
0
,
0
,
1.
1
,
1
],
[
1
,
1
,
2
,
2
]),
[([
0
,
0
,
1.
1
,
1
],
[
1
,
1
,
2
,
2
]),
([
0
,
0
,
1
,
1
],
[
1
,
1
,
2
,
2
]),
([
1
,
1
,
2
,
2
],
[
0
,
0
,
1.1
,
1
])],
dtype
=
box_data_type
)
self
.
detected_scores
=
np
.
array
([
0.
2
,
0.
8
,
0.1
],
dtype
=
float
)
self
.
detected_scores
=
np
.
array
([
0.
8
,
0.
2
,
0.1
],
dtype
=
float
)
self
.
groundtruth_box_tuples
=
np
.
array
(
[([
0
,
0
,
1
,
1
],
[
1
,
1
,
2
,
2
])],
dtype
=
box_data_type
)
def
test_tp_fp_eval
(
self
):
scores
,
tp_fp_labels
=
self
.
eval
.
_compute_tp_fp_for_single_class
(
self
.
detected_box_tuples
,
self
.
detected_scores
,
self
.
groundtruth_box_tuples
)
expected_scores
=
np
.
array
([
0.8
,
0.2
,
0.1
],
dtype
=
float
)
tp_fp_labels
=
self
.
eval
.
_compute_tp_fp_for_single_class
(
self
.
detected_box_tuples
,
self
.
groundtruth_box_tuples
)
expected_tp_fp_labels
=
np
.
array
([
True
,
False
,
False
],
dtype
=
bool
)
self
.
assertTrue
(
np
.
allclose
(
expected_scores
,
scores
))
self
.
assertTrue
(
np
.
allclose
(
expected_tp_fp_labels
,
tp_fp_labels
))
def
test_tp_fp_eval_empty_gt
(
self
):
box_data_type
=
np
.
dtype
([(
'subject'
,
'f4'
,
(
4
,)),
(
'object'
,
'f4'
,
(
4
,))])
scores
,
tp_fp_labels
=
self
.
eval
.
_compute_tp_fp_for_single_class
(
self
.
detected_box_tuples
,
self
.
detected_scores
,
np
.
array
([],
dtype
=
box_data_type
))
expected_scores
=
np
.
array
([
0.8
,
0.2
,
0.1
],
dtype
=
float
)
tp_fp_labels
=
self
.
eval
.
_compute_tp_fp_for_single_class
(
self
.
detected_box_tuples
,
np
.
array
([],
dtype
=
box_data_type
))
expected_tp_fp_labels
=
np
.
array
([
False
,
False
,
False
],
dtype
=
bool
)
self
.
assertTrue
(
np
.
allclose
(
expected_scores
,
scores
))
self
.
assertTrue
(
np
.
allclose
(
expected_tp_fp_labels
,
tp_fp_labels
))
...
...
@@ -82,16 +76,18 @@ class MultiClassPerImageVrdEvaluationTest(tf.test.TestCase):
[(
1
,
2
,
3
),
(
1
,
7
,
3
),
(
1
,
4
,
5
)],
dtype
=
label_data_type
)
def
test_tp_fp_eval
(
self
):
scores
,
tp_fp_labels
=
self
.
eval
.
compute_detection_tp_fp
(
scores
,
tp_fp_labels
,
mapping
=
self
.
eval
.
compute_detection_tp_fp
(
self
.
detected_box_tuples
,
self
.
detected_scores
,
self
.
detected_class_tuples
,
self
.
groundtruth_box_tuples
,
self
.
groundtruth_class_tuples
)
expected_scores
=
np
.
array
([
0.8
,
0.5
,
0.2
,
0.1
],
dtype
=
float
)
expected_tp_fp_labels
=
np
.
array
([
True
,
True
,
False
,
False
],
dtype
=
bool
)
expected_mapping
=
np
.
array
([
1
,
3
,
0
,
2
])
self
.
assertTrue
(
np
.
allclose
(
expected_scores
,
scores
))
self
.
assertTrue
(
np
.
allclose
(
expected_tp_fp_labels
,
tp_fp_labels
))
self
.
assertTrue
(
np
.
allclose
(
expected_mapping
,
mapping
))
if
__name__
==
'__main__'
:
...
...
research/object_detection/utils/test_utils.py
View file @
97760186
...
...
@@ -138,3 +138,36 @@ def create_random_boxes(num_boxes, max_height, max_width):
boxes
[:,
3
]
=
np
.
maximum
(
x_1
,
x_2
)
return
boxes
.
astype
(
np
.
float32
)
def
first_rows_close_as_set
(
a
,
b
,
k
=
None
,
rtol
=
1e-6
,
atol
=
1e-6
):
"""Checks if first K entries of two lists are close, up to permutation.
Inputs to this assert are lists of items which can be compared via
numpy.allclose(...) and can be sorted.
Args:
a: list of items which can be compared via numpy.allclose(...) and are
sortable.
b: list of items which can be compared via numpy.allclose(...) and are
sortable.
k: a non-negative integer. If not provided, k is set to be len(a).
rtol: relative tolerance.
atol: absolute tolerance.
Returns:
boolean, True if input lists a and b have the same length and
the first k entries of the inputs satisfy numpy.allclose() after
sorting entries.
"""
if
not
isinstance
(
a
,
list
)
or
not
isinstance
(
b
,
list
)
or
len
(
a
)
!=
len
(
b
):
return
False
if
not
k
:
k
=
len
(
a
)
k
=
min
(
k
,
len
(
a
))
a_sorted
=
sorted
(
a
[:
k
])
b_sorted
=
sorted
(
b
[:
k
])
return
all
([
np
.
allclose
(
entry_a
,
entry_b
,
rtol
,
atol
)
for
(
entry_a
,
entry_b
)
in
zip
(
a_sorted
,
b_sorted
)
])
research/object_detection/utils/test_utils_test.py
View file @
97760186
...
...
@@ -68,6 +68,22 @@ class TestUtilsTest(tf.test.TestCase):
self
.
assertTrue
(
boxes
[:,
2
].
max
()
<=
max_height
)
self
.
assertTrue
(
boxes
[:,
3
].
max
()
<=
max_width
)
def
test_first_rows_close_as_set
(
self
):
a
=
[
1
,
2
,
3
,
0
,
0
]
b
=
[
3
,
2
,
1
,
0
,
0
]
k
=
3
self
.
assertTrue
(
test_utils
.
first_rows_close_as_set
(
a
,
b
,
k
))
a
=
[[
1
,
2
],
[
1
,
4
],
[
0
,
0
]]
b
=
[[
1
,
4
+
1e-9
],
[
1
,
2
],
[
0
,
0
]]
k
=
2
self
.
assertTrue
(
test_utils
.
first_rows_close_as_set
(
a
,
b
,
k
))
a
=
[[
1
,
2
],
[
1
,
4
],
[
0
,
0
]]
b
=
[[
1
,
4
+
1e-9
],
[
2
,
2
],
[
0
,
0
]]
k
=
2
self
.
assertFalse
(
test_utils
.
first_rows_close_as_set
(
a
,
b
,
k
))
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/utils/visualization_utils.py
View file @
97760186
...
...
@@ -315,11 +315,13 @@ def draw_bounding_boxes_on_image_tensors(images,
instance_masks
=
None
,
keypoints
=
None
,
max_boxes_to_draw
=
20
,
min_score_thresh
=
0.2
):
min_score_thresh
=
0.2
,
use_normalized_coordinates
=
True
):
"""Draws bounding boxes, masks, and keypoints on batch of image tensors.
Args:
images: A 4D uint8 image tensor of shape [N, H, W, C].
images: A 4D uint8 image tensor of shape [N, H, W, C]. If C > 3, additional
channels will be ignored.
boxes: [N, max_detections, 4] float32 tensor of detection boxes.
classes: [N, max_detections] int tensor of detection classes. Note that
classes are 1-indexed.
...
...
@@ -332,12 +334,17 @@ def draw_bounding_boxes_on_image_tensors(images,
with keypoints.
max_boxes_to_draw: Maximum number of boxes to draw on an image. Default 20.
min_score_thresh: Minimum score threshold for visualization. Default 0.2.
use_normalized_coordinates: Whether to assume boxes and kepoints are in
normalized coordinates (as opposed to absolute coordiantes).
Default is True.
Returns:
4D image tensor of type uint8, with boxes drawn on top.
"""
# Additional channels are being ignored.
images
=
images
[:,
:,
:,
0
:
3
]
visualization_keyword_args
=
{
'use_normalized_coordinates'
:
True
,
'use_normalized_coordinates'
:
use_normalized_coordinates
,
'max_boxes_to_draw'
:
max_boxes_to_draw
,
'min_score_thresh'
:
min_score_thresh
,
'agnostic_mode'
:
False
,
...
...
@@ -382,7 +389,8 @@ def draw_bounding_boxes_on_image_tensors(images,
def
draw_side_by_side_evaluation_image
(
eval_dict
,
category_index
,
max_boxes_to_draw
=
20
,
min_score_thresh
=
0.2
):
min_score_thresh
=
0.2
,
use_normalized_coordinates
=
True
):
"""Creates a side-by-side image with detections and groundtruth.
Bounding boxes (and instance masks, if available) are visualized on both
...
...
@@ -394,6 +402,9 @@ def draw_side_by_side_evaluation_image(eval_dict,
category_index: A category index (dictionary) produced from a labelmap.
max_boxes_to_draw: The maximum number of boxes to draw for detections.
min_score_thresh: The minimum score threshold for showing detections.
use_normalized_coordinates: Whether to assume boxes and kepoints are in
normalized coordinates (as opposed to absolute coordiantes).
Default is True.
Returns:
A [1, H, 2 * W, C] uint8 tensor. The subimage on the left corresponds to
...
...
@@ -425,7 +436,8 @@ def draw_side_by_side_evaluation_image(eval_dict,
instance_masks
=
instance_masks
,
keypoints
=
keypoints
,
max_boxes_to_draw
=
max_boxes_to_draw
,
min_score_thresh
=
min_score_thresh
)
min_score_thresh
=
min_score_thresh
,
use_normalized_coordinates
=
use_normalized_coordinates
)
images_with_groundtruth
=
draw_bounding_boxes_on_image_tensors
(
eval_dict
[
input_data_fields
.
original_image
],
tf
.
expand_dims
(
eval_dict
[
input_data_fields
.
groundtruth_boxes
],
axis
=
0
),
...
...
@@ -439,7 +451,8 @@ def draw_side_by_side_evaluation_image(eval_dict,
instance_masks
=
groundtruth_instance_masks
,
keypoints
=
None
,
max_boxes_to_draw
=
None
,
min_score_thresh
=
0.0
)
min_score_thresh
=
0.0
,
use_normalized_coordinates
=
use_normalized_coordinates
)
return
tf
.
concat
([
images_with_detections
,
images_with_groundtruth
],
axis
=
2
)
...
...
research/object_detection/utils/visualization_utils_test.py
View file @
97760186
...
...
@@ -48,6 +48,9 @@ class VisualizationUtilsTest(tf.test.TestCase):
image
=
np
.
concatenate
((
imu
,
imd
),
axis
=
0
)
return
image
def
create_test_image_with_five_channels
(
self
):
return
np
.
full
([
100
,
200
,
5
],
255
,
dtype
=
np
.
uint8
)
def
test_draw_bounding_box_on_image
(
self
):
test_image
=
self
.
create_colorful_test_image
()
test_image
=
Image
.
fromarray
(
test_image
)
...
...
@@ -144,6 +147,32 @@ class VisualizationUtilsTest(tf.test.TestCase):
image_pil
=
Image
.
fromarray
(
images_with_boxes_np
[
i
,
...])
image_pil
.
save
(
output_file
)
def
test_draw_bounding_boxes_on_image_tensors_with_additional_channels
(
self
):
"""Tests the case where input image tensor has more than 3 channels."""
category_index
=
{
1
:
{
'id'
:
1
,
'name'
:
'dog'
}}
image_np
=
self
.
create_test_image_with_five_channels
()
images_np
=
np
.
stack
((
image_np
,
image_np
),
axis
=
0
)
with
tf
.
Graph
().
as_default
():
images_tensor
=
tf
.
constant
(
value
=
images_np
,
dtype
=
tf
.
uint8
)
boxes
=
tf
.
constant
(
0
,
dtype
=
tf
.
float32
,
shape
=
[
2
,
0
,
4
])
classes
=
tf
.
constant
(
0
,
dtype
=
tf
.
int64
,
shape
=
[
2
,
0
])
scores
=
tf
.
constant
(
0
,
dtype
=
tf
.
float32
,
shape
=
[
2
,
0
])
images_with_boxes
=
(
visualization_utils
.
draw_bounding_boxes_on_image_tensors
(
images_tensor
,
boxes
,
classes
,
scores
,
category_index
,
min_score_thresh
=
0.2
))
with
self
.
test_session
()
as
sess
:
sess
.
run
(
tf
.
global_variables_initializer
())
final_images_np
=
sess
.
run
(
images_with_boxes
)
self
.
assertEqual
((
2
,
100
,
200
,
3
),
final_images_np
.
shape
)
def
test_draw_keypoints_on_image
(
self
):
test_image
=
self
.
create_colorful_test_image
()
test_image
=
Image
.
fromarray
(
test_image
)
...
...
research/object_detection/utils/vrd_evaluation.py
0 → 100644
View file @
97760186
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Evaluator class for Visual Relations Detection.
VRDDetectionEvaluator is a class which manages ground truth information of a
visual relations detection (vrd) dataset, and computes frequently used detection
metrics such as Precision, Recall, Recall@k, of the provided vrd detection
results.
It supports the following operations:
1) Adding ground truth information of images sequentially.
2) Adding detection results of images sequentially.
3) Evaluating detection metrics on already inserted detection results.
Note1: groundtruth should be inserted before evaluation.
Note2: This module operates on numpy boxes and box lists.
"""
from
abc
import
abstractmethod
import
collections
import
logging
import
numpy
as
np
from
object_detection.core
import
standard_fields
from
object_detection.utils
import
metrics
from
object_detection.utils
import
object_detection_evaluation
from
object_detection.utils
import
per_image_vrd_evaluation
# Below standard input numpy datatypes are defined:
# box_data_type - datatype of the groundtruth visual relations box annotations;
# this datatype consists of two named boxes: subject bounding box and object
# bounding box. Each box is of the format [y_min, x_min, y_max, x_max], each
# coordinate being of type float32.
# label_data_type - corresponding datatype of the visual relations label
# annotaions; it consists of three numerical class labels: subject class label,
# object class label and relation class label, each class label being of type
# int32.
vrd_box_data_type
=
np
.
dtype
([(
'subject'
,
'f4'
,
(
4
,)),
(
'object'
,
'f4'
,
(
4
,))])
single_box_data_type
=
np
.
dtype
([(
'box'
,
'f4'
,
(
4
,))])
label_data_type
=
np
.
dtype
([(
'subject'
,
'i4'
),
(
'object'
,
'i4'
),
(
'relation'
,
'i4'
)])
class
VRDDetectionEvaluator
(
object_detection_evaluation
.
DetectionEvaluator
):
"""A class to evaluate VRD detections.
This class serves as a base class for VRD evaluation in two settings:
- phrase detection
- relation detection.
"""
def
__init__
(
self
,
matching_iou_threshold
=
0.5
,
metric_prefix
=
None
):
"""Constructor.
Args:
matching_iou_threshold: IOU threshold to use for matching groundtruth
boxes to detection boxes.
metric_prefix: (optional) string prefix for metric name; if None, no
prefix is used.
"""
super
(
VRDDetectionEvaluator
,
self
).
__init__
([])
self
.
_matching_iou_threshold
=
matching_iou_threshold
self
.
_evaluation
=
_VRDDetectionEvaluation
(
matching_iou_threshold
=
self
.
_matching_iou_threshold
)
self
.
_image_ids
=
set
([])
self
.
_metric_prefix
=
(
metric_prefix
+
'_'
)
if
metric_prefix
else
''
self
.
_evaluatable_labels
=
{}
self
.
_negative_labels
=
{}
@
abstractmethod
def
_process_groundtruth_boxes
(
self
,
groundtruth_box_tuples
):
"""Pre-processes boxes before adding them to the VRDDetectionEvaluation.
Phrase detection and Relation detection subclasses re-implement this method
depending on the task.
Args:
groundtruth_box_tuples: A numpy array of structures with the shape
[M, 1], each structure containing the same number of named bounding
boxes. Each box is of the format [y_min, x_min, y_max, x_max] (see
datatype vrd_box_data_type, single_box_data_type above).
"""
raise
NotImplementedError
(
'_process_groundtruth_boxes method should be implemented in subclasses'
'of VRDDetectionEvaluator.'
)
@
abstractmethod
def
_process_detection_boxes
(
self
,
detections_box_tuples
):
"""Pre-processes boxes before adding them to the VRDDetectionEvaluation.
Phrase detection and Relation detection subclasses re-implement this method
depending on the task.
Args:
detections_box_tuples: A numpy array of structures with the shape
[M, 1], each structure containing the same number of named bounding
boxes. Each box is of the format [y_min, x_min, y_max, x_max] (see
datatype vrd_box_data_type, single_box_data_type above).
"""
raise
NotImplementedError
(
'_process_detection_boxes method should be implemented in subclasses'
'of VRDDetectionEvaluator.'
)
def
add_single_ground_truth_image_info
(
self
,
image_id
,
groundtruth_dict
):
"""Adds groundtruth for a single image to be used for evaluation.
Args:
image_id: A unique string/integer identifier for the image.
groundtruth_dict: A dictionary containing -
standard_fields.InputDataFields.groundtruth_boxes: A numpy array
of structures with the shape [M, 1], representing M tuples, each tuple
containing the same number of named bounding boxes.
Each box is of the format [y_min, x_min, y_max, x_max] (see
datatype vrd_box_data_type, single_box_data_type above).
standard_fields.InputDataFields.groundtruth_classes: A numpy array of
structures shape [M, 1], representing the class labels of the
corresponding bounding boxes and possibly additional classes (see
datatype label_data_type above).
standard_fields.InputDataFields.verified_labels: numpy array
of shape [K] containing verified labels.
Raises:
ValueError: On adding groundtruth for an image more than once.
"""
if
image_id
in
self
.
_image_ids
:
raise
ValueError
(
'Image with id {} already added.'
.
format
(
image_id
))
groundtruth_class_tuples
=
(
groundtruth_dict
[
standard_fields
.
InputDataFields
.
groundtruth_classes
])
groundtruth_box_tuples
=
(
groundtruth_dict
[
standard_fields
.
InputDataFields
.
groundtruth_boxes
])
self
.
_evaluation
.
add_single_ground_truth_image_info
(
image_key
=
image_id
,
groundtruth_box_tuples
=
self
.
_process_groundtruth_boxes
(
groundtruth_box_tuples
),
groundtruth_class_tuples
=
groundtruth_class_tuples
)
self
.
_image_ids
.
update
([
image_id
])
all_classes
=
[]
for
field
in
groundtruth_box_tuples
.
dtype
.
fields
:
all_classes
.
append
(
groundtruth_class_tuples
[
field
])
groudtruth_positive_classes
=
np
.
unique
(
np
.
concatenate
(
all_classes
))
verified_labels
=
groundtruth_dict
.
get
(
standard_fields
.
InputDataFields
.
verified_labels
,
np
.
array
(
[],
dtype
=
int
))
self
.
_evaluatable_labels
[
image_id
]
=
np
.
unique
(
np
.
concatenate
((
verified_labels
,
groudtruth_positive_classes
)))
self
.
_negative_labels
[
image_id
]
=
np
.
setdiff1d
(
verified_labels
,
groudtruth_positive_classes
)
def
add_single_detected_image_info
(
self
,
image_id
,
detections_dict
):
"""Adds detections for a single image to be used for evaluation.
Args:
image_id: A unique string/integer identifier for the image.
detections_dict: A dictionary containing -
standard_fields.DetectionResultFields.detection_boxes: A numpy array of
structures with shape [N, 1], representing N tuples, each tuple
containing the same number of named bounding boxes.
Each box is of the format [y_min, x_min, y_max, x_max] (as an example
see datatype vrd_box_data_type, single_box_data_type above).
standard_fields.DetectionResultFields.detection_scores: float32 numpy
array of shape [N] containing detection scores for the boxes.
standard_fields.DetectionResultFields.detection_classes: A numpy array
of structures shape [N, 1], representing the class labels of the
corresponding bounding boxes and possibly additional classes (see
datatype label_data_type above).
"""
num_detections
=
detections_dict
[
standard_fields
.
DetectionResultFields
.
detection_boxes
].
shape
[
0
]
detection_class_tuples
=
detections_dict
[
standard_fields
.
DetectionResultFields
.
detection_classes
]
detection_box_tuples
=
detections_dict
[
standard_fields
.
DetectionResultFields
.
detection_boxes
]
selector
=
np
.
ones
(
num_detections
,
dtype
=
bool
)
# Only check boxable labels
for
field
in
detection_box_tuples
.
dtype
.
fields
:
# Verify if one of the labels is negative (this is sure FP)
selector
|=
np
.
isin
(
detection_class_tuples
[
field
],
self
.
_negative_labels
[
image_id
])
# Verify if all labels are verified
selector
|=
np
.
isin
(
detection_class_tuples
[
field
],
self
.
_evaluatable_labels
[
image_id
])
self
.
_evaluation
.
add_single_detected_image_info
(
image_key
=
image_id
,
detected_box_tuples
=
self
.
_process_detection_boxes
(
detection_box_tuples
[
selector
]),
detected_scores
=
detections_dict
[
standard_fields
.
DetectionResultFields
.
detection_scores
][
selector
],
detected_class_tuples
=
detection_class_tuples
[
selector
])
def
evaluate
(
self
,
relationships
=
None
):
"""Compute evaluation result.
Args:
relationships: A dictionary of numerical label-text label mapping; if
specified, returns per-relationship AP.
Returns:
A dictionary of metrics with the following fields -
summary_metrics:
'weightedAP@<matching_iou_threshold>IOU' : weighted average precision
at the specified IOU threshold.
'AP@<matching_iou_threshold>IOU/<relationship>' : AP per relationship.
'mAP@<matching_iou_threshold>IOU': mean average precision at the
specified IOU threshold.
'Recall@50@<matching_iou_threshold>IOU': recall@50 at the specified IOU
threshold.
'Recall@100@<matching_iou_threshold>IOU': recall@100 at the specified
IOU threshold.
if relationships is specified, returns <relationship> in AP metrics as
readable names, otherwise the names correspond to class numbers.
"""
(
weighted_average_precision
,
mean_average_precision
,
average_precisions
,
_
,
_
,
recall_50
,
recall_100
,
_
,
_
)
=
(
self
.
_evaluation
.
evaluate
())
vrd_metrics
=
{
(
self
.
_metric_prefix
+
'weightedAP@{}IOU'
.
format
(
self
.
_matching_iou_threshold
)):
weighted_average_precision
,
self
.
_metric_prefix
+
'mAP@{}IOU'
.
format
(
self
.
_matching_iou_threshold
):
mean_average_precision
,
self
.
_metric_prefix
+
'Recall@50@{}IOU'
.
format
(
self
.
_matching_iou_threshold
):
recall_50
,
self
.
_metric_prefix
+
'Recall@100@{}IOU'
.
format
(
self
.
_matching_iou_threshold
):
recall_100
,
}
if
relationships
:
for
key
,
average_precision
in
average_precisions
.
iteritems
():
vrd_metrics
[
self
.
_metric_prefix
+
'AP@{}IOU/{}'
.
format
(
self
.
_matching_iou_threshold
,
relationships
[
key
])]
=
average_precision
else
:
for
key
,
average_precision
in
average_precisions
.
iteritems
():
vrd_metrics
[
self
.
_metric_prefix
+
'AP@{}IOU/{}'
.
format
(
self
.
_matching_iou_threshold
,
key
)]
=
average_precision
return
vrd_metrics
def
clear
(
self
):
"""Clears the state to prepare for a fresh evaluation."""
self
.
_evaluation
=
_VRDDetectionEvaluation
(
matching_iou_threshold
=
self
.
_matching_iou_threshold
)
self
.
_image_ids
.
clear
()
self
.
_negative_labels
.
clear
()
self
.
_evaluatable_labels
.
clear
()
class
VRDRelationDetectionEvaluator
(
VRDDetectionEvaluator
):
"""A class to evaluate VRD detections in relations setting.
Expected groundtruth box datatype is vrd_box_data_type, expected groudtruth
labels datatype is label_data_type.
Expected detection box datatype is vrd_box_data_type, expected detection
labels
datatype is label_data_type.
"""
def
__init__
(
self
,
matching_iou_threshold
=
0.5
):
super
(
VRDRelationDetectionEvaluator
,
self
).
__init__
(
matching_iou_threshold
=
matching_iou_threshold
,
metric_prefix
=
'VRDMetric_Relationships'
)
def
_process_groundtruth_boxes
(
self
,
groundtruth_box_tuples
):
"""Pre-processes boxes before adding them to the VRDDetectionEvaluation.
Args:
groundtruth_box_tuples: A numpy array of structures with the shape
[M, 1], each structure containing the same number of named bounding
boxes. Each box is of the format [y_min, x_min, y_max, x_max].
Returns:
Unchanged input.
"""
return
groundtruth_box_tuples
def
_process_detection_boxes
(
self
,
detections_box_tuples
):
"""Pre-processes boxes before adding them to the VRDDetectionEvaluation.
Phrase detection and Relation detection subclasses re-implement this method
depending on the task.
Args:
detections_box_tuples: A numpy array of structures with the shape
[M, 1], each structure containing the same number of named bounding
boxes. Each box is of the format [y_min, x_min, y_max, x_max] (see
datatype vrd_box_data_type, single_box_data_type above).
Returns:
Unchanged input.
"""
return
detections_box_tuples
class
VRDPhraseDetectionEvaluator
(
VRDDetectionEvaluator
):
"""A class to evaluate VRD detections in phrase setting.
Expected groundtruth box datatype is vrd_box_data_type, expected groudtruth
labels datatype is label_data_type.
Expected detection box datatype is single_box_data_type, expected detection
labels datatype is label_data_type.
"""
def
__init__
(
self
,
matching_iou_threshold
=
0.5
):
super
(
VRDPhraseDetectionEvaluator
,
self
).
__init__
(
matching_iou_threshold
=
matching_iou_threshold
,
metric_prefix
=
'VRDMetric_Phrases'
)
def
_process_groundtruth_boxes
(
self
,
groundtruth_box_tuples
):
"""Pre-processes boxes before adding them to the VRDDetectionEvaluation.
In case of phrase evaluation task, evaluation expects exactly one bounding
box containing all objects in the phrase. This bounding box is computed
as an enclosing box of all groundtruth boxes of a phrase.
Args:
groundtruth_box_tuples: A numpy array of structures with the shape
[M, 1], each structure containing the same number of named bounding
boxes. Each box is of the format [y_min, x_min, y_max, x_max]. See
vrd_box_data_type for an example of structure.
Returns:
result: A numpy array of structures with the shape [M, 1], each
structure containing exactly one named bounding box. i-th output
structure corresponds to the result of processing i-th input structure,
where the named bounding box is computed as an enclosing bounding box
of all bounding boxes of the i-th input structure.
"""
first_box_key
=
groundtruth_box_tuples
.
dtype
.
fields
.
keys
()[
0
]
miny
=
groundtruth_box_tuples
[
first_box_key
][:,
0
]
minx
=
groundtruth_box_tuples
[
first_box_key
][:,
1
]
maxy
=
groundtruth_box_tuples
[
first_box_key
][:,
2
]
maxx
=
groundtruth_box_tuples
[
first_box_key
][:,
3
]
for
fields
in
groundtruth_box_tuples
.
dtype
.
fields
:
miny
=
np
.
minimum
(
groundtruth_box_tuples
[
fields
][:,
0
],
miny
)
minx
=
np
.
minimum
(
groundtruth_box_tuples
[
fields
][:,
1
],
minx
)
maxy
=
np
.
maximum
(
groundtruth_box_tuples
[
fields
][:,
2
],
maxy
)
maxx
=
np
.
maximum
(
groundtruth_box_tuples
[
fields
][:,
3
],
maxx
)
data_result
=
[]
for
i
in
range
(
groundtruth_box_tuples
.
shape
[
0
]):
data_result
.
append
(([
miny
[
i
],
minx
[
i
],
maxy
[
i
],
maxx
[
i
]],))
result
=
np
.
array
(
data_result
,
dtype
=
[(
'box'
,
'f4'
,
(
4
,))])
return
result
def
_process_detection_boxes
(
self
,
detections_box_tuples
):
"""Pre-processes boxes before adding them to the VRDDetectionEvaluation.
In case of phrase evaluation task, evaluation expects exactly one bounding
box containing all objects in the phrase. This bounding box is computed
as an enclosing box of all groundtruth boxes of a phrase.
Args:
detections_box_tuples: A numpy array of structures with the shape
[M, 1], each structure containing the same number of named bounding
boxes. Each box is of the format [y_min, x_min, y_max, x_max]. See
vrd_box_data_type for an example of this structure.
Returns:
result: A numpy array of structures with the shape [M, 1], each
structure containing exactly one named bounding box. i-th output
structure corresponds to the result of processing i-th input structure,
where the named bounding box is computed as an enclosing bounding box
of all bounding boxes of the i-th input structure.
"""
first_box_key
=
detections_box_tuples
.
dtype
.
fields
.
keys
()[
0
]
miny
=
detections_box_tuples
[
first_box_key
][:,
0
]
minx
=
detections_box_tuples
[
first_box_key
][:,
1
]
maxy
=
detections_box_tuples
[
first_box_key
][:,
2
]
maxx
=
detections_box_tuples
[
first_box_key
][:,
3
]
for
fields
in
detections_box_tuples
.
dtype
.
fields
:
miny
=
np
.
minimum
(
detections_box_tuples
[
fields
][:,
0
],
miny
)
minx
=
np
.
minimum
(
detections_box_tuples
[
fields
][:,
1
],
minx
)
maxy
=
np
.
maximum
(
detections_box_tuples
[
fields
][:,
2
],
maxy
)
maxx
=
np
.
maximum
(
detections_box_tuples
[
fields
][:,
3
],
maxx
)
data_result
=
[]
for
i
in
range
(
detections_box_tuples
.
shape
[
0
]):
data_result
.
append
(([
miny
[
i
],
minx
[
i
],
maxy
[
i
],
maxx
[
i
]],))
result
=
np
.
array
(
data_result
,
dtype
=
[(
'box'
,
'f4'
,
(
4
,))])
return
result
VRDDetectionEvalMetrics
=
collections
.
namedtuple
(
'VRDDetectionEvalMetrics'
,
[
'weighted_average_precision'
,
'mean_average_precision'
,
'average_precisions'
,
'precisions'
,
'recalls'
,
'recall_50'
,
'recall_100'
,
'median_rank_50'
,
'median_rank_100'
])
class
_VRDDetectionEvaluation
(
object
):
"""Performs metric computation for the VRD task. This class is internal.
"""
def
__init__
(
self
,
matching_iou_threshold
=
0.5
):
"""Constructor.
Args:
matching_iou_threshold: IOU threshold to use for matching groundtruth
boxes to detection boxes.
"""
self
.
_per_image_eval
=
per_image_vrd_evaluation
.
PerImageVRDEvaluation
(
matching_iou_threshold
=
matching_iou_threshold
)
self
.
_groundtruth_box_tuples
=
{}
self
.
_groundtruth_class_tuples
=
{}
self
.
_num_gt_instances
=
0
self
.
_num_gt_imgs
=
0
self
.
_num_gt_instances_per_relationship
=
{}
self
.
clear_detections
()
def
clear_detections
(
self
):
"""Clears detections."""
self
.
_detection_keys
=
set
()
self
.
_scores
=
[]
self
.
_relation_field_values
=
[]
self
.
_tp_fp_labels
=
[]
self
.
_average_precisions
=
{}
self
.
_precisions
=
[]
self
.
_recalls
=
[]
def
add_single_ground_truth_image_info
(
self
,
image_key
,
groundtruth_box_tuples
,
groundtruth_class_tuples
):
"""Adds groundtruth for a single image to be used for evaluation.
Args:
image_key: A unique string/integer identifier for the image.
groundtruth_box_tuples: A numpy array of structures with the shape
[M, 1], representing M tuples, each tuple containing the same number
of named bounding boxes.
Each box is of the format [y_min, x_min, y_max, x_max].
groundtruth_class_tuples: A numpy array of structures shape [M, 1],
representing the class labels of the corresponding bounding boxes and
possibly additional classes.
"""
if
image_key
in
self
.
_groundtruth_box_tuples
:
logging
.
warn
(
'image %s has already been added to the ground truth database.'
,
image_key
)
return
self
.
_groundtruth_box_tuples
[
image_key
]
=
groundtruth_box_tuples
self
.
_groundtruth_class_tuples
[
image_key
]
=
groundtruth_class_tuples
self
.
_update_groundtruth_statistics
(
groundtruth_class_tuples
)
def
add_single_detected_image_info
(
self
,
image_key
,
detected_box_tuples
,
detected_scores
,
detected_class_tuples
):
"""Adds detections for a single image to be used for evaluation.
Args:
image_key: A unique string/integer identifier for the image.
detected_box_tuples: A numpy array of structures with shape [N, 1],
representing N tuples, each tuple containing the same number of named
bounding boxes.
Each box is of the format [y_min, x_min, y_max, x_max].
detected_scores: A float numpy array of shape [N, 1], representing
the confidence scores of the detected N object instances.
detected_class_tuples: A numpy array of structures shape [N, 1],
representing the class labels of the corresponding bounding boxes and
possibly additional classes.
"""
self
.
_detection_keys
.
add
(
image_key
)
if
image_key
in
self
.
_groundtruth_box_tuples
:
groundtruth_box_tuples
=
self
.
_groundtruth_box_tuples
[
image_key
]
groundtruth_class_tuples
=
self
.
_groundtruth_class_tuples
[
image_key
]
else
:
groundtruth_box_tuples
=
np
.
empty
(
shape
=
[
0
,
4
],
dtype
=
float
)
groundtruth_class_tuples
=
np
.
array
([],
dtype
=
int
)
scores
,
tp_fp_labels
,
mapping
=
(
self
.
_per_image_eval
.
compute_detection_tp_fp
(
detected_box_tuples
=
detected_box_tuples
,
detected_scores
=
detected_scores
,
detected_class_tuples
=
detected_class_tuples
,
groundtruth_box_tuples
=
groundtruth_box_tuples
,
groundtruth_class_tuples
=
groundtruth_class_tuples
))
self
.
_scores
+=
[
scores
]
self
.
_tp_fp_labels
+=
[
tp_fp_labels
]
self
.
_relation_field_values
+=
[
detected_class_tuples
[
mapping
][
'relation'
]]
def
_update_groundtruth_statistics
(
self
,
groundtruth_class_tuples
):
"""Updates grouth truth statistics.
Args:
groundtruth_class_tuples: A numpy array of structures shape [M, 1],
representing the class labels of the corresponding bounding boxes and
possibly additional classes.
"""
self
.
_num_gt_instances
+=
groundtruth_class_tuples
.
shape
[
0
]
self
.
_num_gt_imgs
+=
1
for
relation_field_value
in
np
.
unique
(
groundtruth_class_tuples
[
'relation'
]):
if
relation_field_value
not
in
self
.
_num_gt_instances_per_relationship
:
self
.
_num_gt_instances_per_relationship
[
relation_field_value
]
=
0
self
.
_num_gt_instances_per_relationship
[
relation_field_value
]
+=
np
.
sum
(
groundtruth_class_tuples
[
'relation'
]
==
relation_field_value
)
def
evaluate
(
self
):
"""Computes evaluation result.
Returns:
A named tuple with the following fields -
average_precision: a float number corresponding to average precision.
precisions: an array of precisions.
recalls: an array of recalls.
recall@50: recall computed on 50 top-scoring samples.
recall@100: recall computed on 100 top-scoring samples.
median_rank@50: median rank computed on 50 top-scoring samples.
median_rank@100: median rank computed on 100 top-scoring samples.
"""
if
self
.
_num_gt_instances
==
0
:
logging
.
warn
(
'No ground truth instances'
)
if
not
self
.
_scores
:
scores
=
np
.
array
([],
dtype
=
float
)
tp_fp_labels
=
np
.
array
([],
dtype
=
bool
)
else
:
scores
=
np
.
concatenate
(
self
.
_scores
)
tp_fp_labels
=
np
.
concatenate
(
self
.
_tp_fp_labels
)
relation_field_values
=
np
.
concatenate
(
self
.
_relation_field_values
)
for
relation_field_value
,
_
in
(
self
.
_num_gt_instances_per_relationship
.
iteritems
()):
precisions
,
recalls
=
metrics
.
compute_precision_recall
(
scores
[
relation_field_values
==
relation_field_value
],
tp_fp_labels
[
relation_field_values
==
relation_field_value
],
self
.
_num_gt_instances_per_relationship
[
relation_field_value
])
self
.
_average_precisions
[
relation_field_value
]
=
metrics
.
compute_average_precision
(
precisions
,
recalls
)
self
.
_mean_average_precision
=
np
.
mean
(
self
.
_average_precisions
.
values
())
self
.
_precisions
,
self
.
_recalls
=
metrics
.
compute_precision_recall
(
scores
,
tp_fp_labels
,
self
.
_num_gt_instances
)
self
.
_weighted_average_precision
=
metrics
.
compute_average_precision
(
self
.
_precisions
,
self
.
_recalls
)
self
.
_recall_50
=
(
metrics
.
compute_recall_at_k
(
self
.
_tp_fp_labels
,
self
.
_num_gt_instances
,
50
))
self
.
_median_rank_50
=
(
metrics
.
compute_median_rank_at_k
(
self
.
_tp_fp_labels
,
50
))
self
.
_recall_100
=
(
metrics
.
compute_recall_at_k
(
self
.
_tp_fp_labels
,
self
.
_num_gt_instances
,
100
))
self
.
_median_rank_100
=
(
metrics
.
compute_median_rank_at_k
(
self
.
_tp_fp_labels
,
100
))
return
VRDDetectionEvalMetrics
(
self
.
_weighted_average_precision
,
self
.
_mean_average_precision
,
self
.
_average_precisions
,
self
.
_precisions
,
self
.
_recalls
,
self
.
_recall_50
,
self
.
_recall_100
,
self
.
_median_rank_50
,
self
.
_median_rank_100
)
research/object_detection/utils/vrd_evaluation_test.py
0 → 100644
View file @
97760186
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for tensorflow_models.object_detection.utils.vrd_evaluation."""
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.core
import
standard_fields
from
object_detection.utils
import
vrd_evaluation
class
VRDRelationDetectionEvaluatorTest
(
tf
.
test
.
TestCase
):
def
test_vrdrelation_evaluator
(
self
):
self
.
vrd_eval
=
vrd_evaluation
.
VRDRelationDetectionEvaluator
()
image_key1
=
'img1'
groundtruth_box_tuples1
=
np
.
array
(
[([
0
,
0
,
1
,
1
],
[
1
,
1
,
2
,
2
]),
([
0
,
0
,
1
,
1
],
[
1
,
2
,
2
,
3
])],
dtype
=
vrd_evaluation
.
vrd_box_data_type
)
groundtruth_class_tuples1
=
np
.
array
(
[(
1
,
2
,
3
),
(
1
,
4
,
3
)],
dtype
=
vrd_evaluation
.
label_data_type
)
groundtruth_verified_labels1
=
np
.
array
([
1
,
2
,
3
,
4
,
5
],
dtype
=
int
)
self
.
vrd_eval
.
add_single_ground_truth_image_info
(
image_key1
,
{
standard_fields
.
InputDataFields
.
groundtruth_boxes
:
groundtruth_box_tuples1
,
standard_fields
.
InputDataFields
.
groundtruth_classes
:
groundtruth_class_tuples1
,
standard_fields
.
InputDataFields
.
verified_labels
:
groundtruth_verified_labels1
})
image_key2
=
'img2'
groundtruth_box_tuples2
=
np
.
array
(
[([
0
,
0
,
1
,
1
],
[
1
,
1
,
2
,
2
])],
dtype
=
vrd_evaluation
.
vrd_box_data_type
)
groundtruth_class_tuples2
=
np
.
array
(
[(
1
,
4
,
3
)],
dtype
=
vrd_evaluation
.
label_data_type
)
self
.
vrd_eval
.
add_single_ground_truth_image_info
(
image_key2
,
{
standard_fields
.
InputDataFields
.
groundtruth_boxes
:
groundtruth_box_tuples2
,
standard_fields
.
InputDataFields
.
groundtruth_classes
:
groundtruth_class_tuples2
,
})
image_key3
=
'img3'
groundtruth_box_tuples3
=
np
.
array
(
[([
0
,
0
,
1
,
1
],
[
1
,
1
,
2
,
2
])],
dtype
=
vrd_evaluation
.
vrd_box_data_type
)
groundtruth_class_tuples3
=
np
.
array
(
[(
1
,
2
,
4
)],
dtype
=
vrd_evaluation
.
label_data_type
)
self
.
vrd_eval
.
add_single_ground_truth_image_info
(
image_key3
,
{
standard_fields
.
InputDataFields
.
groundtruth_boxes
:
groundtruth_box_tuples3
,
standard_fields
.
InputDataFields
.
groundtruth_classes
:
groundtruth_class_tuples3
,
})
image_key
=
'img1'
detected_box_tuples
=
np
.
array
(
[([
0
,
0.3
,
1
,
1
],
[
1.1
,
1
,
2
,
2
]),
([
0
,
0
,
1
,
1
],
[
1
,
1
,
2
,
2
])],
dtype
=
vrd_evaluation
.
vrd_box_data_type
)
detected_class_tuples
=
np
.
array
(
[(
1
,
2
,
5
),
(
1
,
2
,
3
)],
dtype
=
vrd_evaluation
.
label_data_type
)
detected_scores
=
np
.
array
([
0.7
,
0.8
],
dtype
=
float
)
self
.
vrd_eval
.
add_single_detected_image_info
(
image_key
,
{
standard_fields
.
DetectionResultFields
.
detection_boxes
:
detected_box_tuples
,
standard_fields
.
DetectionResultFields
.
detection_scores
:
detected_scores
,
standard_fields
.
DetectionResultFields
.
detection_classes
:
detected_class_tuples
})
metrics
=
self
.
vrd_eval
.
evaluate
()
self
.
assertAlmostEqual
(
metrics
[
'VRDMetric_Relationships_weightedAP@0.5IOU'
],
0.25
)
self
.
assertAlmostEqual
(
metrics
[
'VRDMetric_Relationships_mAP@0.5IOU'
],
0.1666666666666666
)
self
.
assertAlmostEqual
(
metrics
[
'VRDMetric_Relationships_AP@0.5IOU/3'
],
0.3333333333333333
)
self
.
assertAlmostEqual
(
metrics
[
'VRDMetric_Relationships_AP@0.5IOU/4'
],
0
)
self
.
assertAlmostEqual
(
metrics
[
'VRDMetric_Relationships_Recall@50@0.5IOU'
],
0.25
)
self
.
assertAlmostEqual
(
metrics
[
'VRDMetric_Relationships_Recall@100@0.5IOU'
],
0.25
)
self
.
vrd_eval
.
clear
()
self
.
assertFalse
(
self
.
vrd_eval
.
_image_ids
)
class
VRDPhraseDetectionEvaluatorTest
(
tf
.
test
.
TestCase
):
def
test_vrdphrase_evaluator
(
self
):
self
.
vrd_eval
=
vrd_evaluation
.
VRDPhraseDetectionEvaluator
()
image_key1
=
'img1'
groundtruth_box_tuples1
=
np
.
array
(
[([
0
,
0
,
1
,
1
],
[
1
,
1
,
2
,
2
]),
([
0
,
0
,
1
,
1
],
[
1
,
2
,
2
,
3
])],
dtype
=
vrd_evaluation
.
vrd_box_data_type
)
groundtruth_class_tuples1
=
np
.
array
(
[(
1
,
2
,
3
),
(
1
,
4
,
3
)],
dtype
=
vrd_evaluation
.
label_data_type
)
groundtruth_verified_labels1
=
np
.
array
([
1
,
2
,
3
,
4
,
5
],
dtype
=
int
)
self
.
vrd_eval
.
add_single_ground_truth_image_info
(
image_key1
,
{
standard_fields
.
InputDataFields
.
groundtruth_boxes
:
groundtruth_box_tuples1
,
standard_fields
.
InputDataFields
.
groundtruth_classes
:
groundtruth_class_tuples1
,
standard_fields
.
InputDataFields
.
verified_labels
:
groundtruth_verified_labels1
})
image_key2
=
'img2'
groundtruth_box_tuples2
=
np
.
array
(
[([
0
,
0
,
1
,
1
],
[
1
,
1
,
2
,
2
])],
dtype
=
vrd_evaluation
.
vrd_box_data_type
)
groundtruth_class_tuples2
=
np
.
array
(
[(
1
,
4
,
3
)],
dtype
=
vrd_evaluation
.
label_data_type
)
self
.
vrd_eval
.
add_single_ground_truth_image_info
(
image_key2
,
{
standard_fields
.
InputDataFields
.
groundtruth_boxes
:
groundtruth_box_tuples2
,
standard_fields
.
InputDataFields
.
groundtruth_classes
:
groundtruth_class_tuples2
,
})
image_key3
=
'img3'
groundtruth_box_tuples3
=
np
.
array
(
[([
0
,
0
,
1
,
1
],
[
1
,
1
,
2
,
2
])],
dtype
=
vrd_evaluation
.
vrd_box_data_type
)
groundtruth_class_tuples3
=
np
.
array
(
[(
1
,
2
,
4
)],
dtype
=
vrd_evaluation
.
label_data_type
)
self
.
vrd_eval
.
add_single_ground_truth_image_info
(
image_key3
,
{
standard_fields
.
InputDataFields
.
groundtruth_boxes
:
groundtruth_box_tuples3
,
standard_fields
.
InputDataFields
.
groundtruth_classes
:
groundtruth_class_tuples3
,
})
image_key
=
'img1'
detected_box_tuples
=
np
.
array
(
[([
0
,
0.3
,
0.5
,
0.5
],
[
0.3
,
0.3
,
1.0
,
1.0
]),
([
0
,
0
,
1.2
,
1.2
],
[
0.0
,
0.0
,
2.0
,
2.0
])],
dtype
=
vrd_evaluation
.
vrd_box_data_type
)
detected_class_tuples
=
np
.
array
(
[(
1
,
2
,
5
),
(
1
,
2
,
3
)],
dtype
=
vrd_evaluation
.
label_data_type
)
detected_scores
=
np
.
array
([
0.7
,
0.8
],
dtype
=
float
)
self
.
vrd_eval
.
add_single_detected_image_info
(
image_key
,
{
standard_fields
.
DetectionResultFields
.
detection_boxes
:
detected_box_tuples
,
standard_fields
.
DetectionResultFields
.
detection_scores
:
detected_scores
,
standard_fields
.
DetectionResultFields
.
detection_classes
:
detected_class_tuples
})
metrics
=
self
.
vrd_eval
.
evaluate
()
self
.
assertAlmostEqual
(
metrics
[
'VRDMetric_Phrases_weightedAP@0.5IOU'
],
0.25
)
self
.
assertAlmostEqual
(
metrics
[
'VRDMetric_Phrases_mAP@0.5IOU'
],
0.1666666666666666
)
self
.
assertAlmostEqual
(
metrics
[
'VRDMetric_Phrases_AP@0.5IOU/3'
],
0.3333333333333333
)
self
.
assertAlmostEqual
(
metrics
[
'VRDMetric_Phrases_AP@0.5IOU/4'
],
0
)
self
.
assertAlmostEqual
(
metrics
[
'VRDMetric_Phrases_Recall@50@0.5IOU'
],
0.25
)
self
.
assertAlmostEqual
(
metrics
[
'VRDMetric_Phrases_Recall@100@0.5IOU'
],
0.25
)
self
.
vrd_eval
.
clear
()
self
.
assertFalse
(
self
.
vrd_eval
.
_image_ids
)
class
VRDDetectionEvaluationTest
(
tf
.
test
.
TestCase
):
def
setUp
(
self
):
self
.
vrd_eval
=
vrd_evaluation
.
_VRDDetectionEvaluation
(
matching_iou_threshold
=
0.5
)
image_key1
=
'img1'
groundtruth_box_tuples1
=
np
.
array
(
[([
0
,
0
,
1
,
1
],
[
1
,
1
,
2
,
2
]),
([
0
,
0
,
1
,
1
],
[
1
,
2
,
2
,
3
])],
dtype
=
vrd_evaluation
.
vrd_box_data_type
)
groundtruth_class_tuples1
=
np
.
array
(
[(
1
,
2
,
3
),
(
1
,
4
,
3
)],
dtype
=
vrd_evaluation
.
label_data_type
)
self
.
vrd_eval
.
add_single_ground_truth_image_info
(
image_key1
,
groundtruth_box_tuples1
,
groundtruth_class_tuples1
)
image_key2
=
'img2'
groundtruth_box_tuples2
=
np
.
array
(
[([
0
,
0
,
1
,
1
],
[
1
,
1
,
2
,
2
])],
dtype
=
vrd_evaluation
.
vrd_box_data_type
)
groundtruth_class_tuples2
=
np
.
array
(
[(
1
,
4
,
3
)],
dtype
=
vrd_evaluation
.
label_data_type
)
self
.
vrd_eval
.
add_single_ground_truth_image_info
(
image_key2
,
groundtruth_box_tuples2
,
groundtruth_class_tuples2
)
image_key3
=
'img3'
groundtruth_box_tuples3
=
np
.
array
(
[([
0
,
0
,
1
,
1
],
[
1
,
1
,
2
,
2
])],
dtype
=
vrd_evaluation
.
vrd_box_data_type
)
groundtruth_class_tuples3
=
np
.
array
(
[(
1
,
2
,
4
)],
dtype
=
vrd_evaluation
.
label_data_type
)
self
.
vrd_eval
.
add_single_ground_truth_image_info
(
image_key3
,
groundtruth_box_tuples3
,
groundtruth_class_tuples3
)
image_key
=
'img1'
detected_box_tuples
=
np
.
array
(
[([
0
,
0.3
,
1
,
1
],
[
1.1
,
1
,
2
,
2
]),
([
0
,
0
,
1
,
1
],
[
1
,
1
,
2
,
2
])],
dtype
=
vrd_evaluation
.
vrd_box_data_type
)
detected_class_tuples
=
np
.
array
(
[(
1
,
2
,
3
),
(
1
,
2
,
3
)],
dtype
=
vrd_evaluation
.
label_data_type
)
detected_scores
=
np
.
array
([
0.7
,
0.8
],
dtype
=
float
)
self
.
vrd_eval
.
add_single_detected_image_info
(
image_key
,
detected_box_tuples
,
detected_scores
,
detected_class_tuples
)
metrics
=
self
.
vrd_eval
.
evaluate
()
expected_weighted_average_precision
=
0.25
expected_mean_average_precision
=
0.16666666666666
expected_precision
=
np
.
array
([
1.
,
0.5
],
dtype
=
float
)
expected_recall
=
np
.
array
([
0.25
,
0.25
],
dtype
=
float
)
expected_recall_50
=
0.25
expected_recall_100
=
0.25
expected_median_rank_50
=
0
expected_median_rank_100
=
0
self
.
assertAlmostEqual
(
expected_weighted_average_precision
,
metrics
.
weighted_average_precision
)
self
.
assertAlmostEqual
(
expected_mean_average_precision
,
metrics
.
mean_average_precision
)
self
.
assertAlmostEqual
(
expected_mean_average_precision
,
metrics
.
mean_average_precision
)
self
.
assertAllClose
(
expected_precision
,
metrics
.
precisions
)
self
.
assertAllClose
(
expected_recall
,
metrics
.
recalls
)
self
.
assertAlmostEqual
(
expected_recall_50
,
metrics
.
recall_50
)
self
.
assertAlmostEqual
(
expected_recall_100
,
metrics
.
recall_100
)
self
.
assertAlmostEqual
(
expected_median_rank_50
,
metrics
.
median_rank_50
)
self
.
assertAlmostEqual
(
expected_median_rank_100
,
metrics
.
median_rank_100
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment