Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
fd7b6887
Unverified
Commit
fd7b6887
authored
Feb 09, 2018
by
Jonathan Huang
Committed by
GitHub
Feb 09, 2018
Browse files
Merge pull request #3293 from pkulzc/master
Internal changes of object_detection
parents
f98ec55e
1efe98bb
Changes
200
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1349 additions
and
448 deletions
+1349
-448
research/object_detection/builders/losses_builder.py
research/object_detection/builders/losses_builder.py
+8
-18
research/object_detection/builders/losses_builder_test.py
research/object_detection/builders/losses_builder_test.py
+1
-2
research/object_detection/builders/matcher_builder.py
research/object_detection/builders/matcher_builder.py
+4
-2
research/object_detection/builders/matcher_builder_test.py
research/object_detection/builders/matcher_builder_test.py
+2
-0
research/object_detection/builders/model_builder.py
research/object_detection/builders/model_builder.py
+24
-11
research/object_detection/builders/model_builder_test.py
research/object_detection/builders/model_builder_test.py
+167
-3
research/object_detection/builders/optimizer_builder.py
research/object_detection/builders/optimizer_builder.py
+15
-12
research/object_detection/builders/optimizer_builder_test.py
research/object_detection/builders/optimizer_builder_test.py
+15
-25
research/object_detection/builders/preprocessor_builder.py
research/object_detection/builders/preprocessor_builder.py
+1
-0
research/object_detection/builders/preprocessor_builder_test.py
...ch/object_detection/builders/preprocessor_builder_test.py
+10
-0
research/object_detection/core/BUILD
research/object_detection/core/BUILD
+34
-18
research/object_detection/core/__init__.py
research/object_detection/core/__init__.py
+1
-0
research/object_detection/core/anchor_generator.py
research/object_detection/core/anchor_generator.py
+3
-2
research/object_detection/core/box_list_ops.py
research/object_detection/core/box_list_ops.py
+78
-1
research/object_detection/core/box_list_ops_test.py
research/object_detection/core/box_list_ops_test.py
+72
-0
research/object_detection/core/box_predictor.py
research/object_detection/core/box_predictor.py
+464
-135
research/object_detection/core/box_predictor_test.py
research/object_detection/core/box_predictor_test.py
+305
-63
research/object_detection/core/losses.py
research/object_detection/core/losses.py
+30
-69
research/object_detection/core/losses_test.py
research/object_detection/core/losses_test.py
+65
-83
research/object_detection/core/matcher.py
research/object_detection/core/matcher.py
+50
-4
No files found.
research/object_detection/builders/losses_builder.py
View file @
fd7b6887
...
...
@@ -116,18 +116,17 @@ def build_faster_rcnn_classification_loss(loss_config):
loss_type
=
loss_config
.
WhichOneof
(
'classification_loss'
)
if
loss_type
==
'weighted_sigmoid'
:
config
=
loss_config
.
weighted_sigmoid
return
losses
.
WeightedSigmoidClassificationLoss
(
anchorwise_output
=
config
.
anchorwise_output
)
return
losses
.
WeightedSigmoidClassificationLoss
()
if
loss_type
==
'weighted_softmax'
:
config
=
loss_config
.
weighted_softmax
return
losses
.
WeightedSoftmaxClassificationLoss
(
anchorwise_output
=
config
.
anchorwise_output
)
logit_scale
=
config
.
logit_scale
)
# By default, Faster RCNN second stage classifier uses Softmax loss
# with anchor-wise outputs.
config
=
loss_config
.
weighted_softmax
return
losses
.
WeightedSoftmaxClassificationLoss
(
anchorwise_output
=
Tru
e
)
logit_scale
=
config
.
logit_scal
e
)
def
_build_localization_loss
(
loss_config
):
...
...
@@ -148,14 +147,10 @@ def _build_localization_loss(loss_config):
loss_type
=
loss_config
.
WhichOneof
(
'localization_loss'
)
if
loss_type
==
'weighted_l2'
:
config
=
loss_config
.
weighted_l2
return
losses
.
WeightedL2LocalizationLoss
(
anchorwise_output
=
config
.
anchorwise_output
)
return
losses
.
WeightedL2LocalizationLoss
()
if
loss_type
==
'weighted_smooth_l1'
:
config
=
loss_config
.
weighted_smooth_l1
return
losses
.
WeightedSmoothL1LocalizationLoss
(
anchorwise_output
=
config
.
anchorwise_output
)
return
losses
.
WeightedSmoothL1LocalizationLoss
()
if
loss_type
==
'weighted_iou'
:
return
losses
.
WeightedIOULocalizationLoss
()
...
...
@@ -181,9 +176,7 @@ def _build_classification_loss(loss_config):
loss_type
=
loss_config
.
WhichOneof
(
'classification_loss'
)
if
loss_type
==
'weighted_sigmoid'
:
config
=
loss_config
.
weighted_sigmoid
return
losses
.
WeightedSigmoidClassificationLoss
(
anchorwise_output
=
config
.
anchorwise_output
)
return
losses
.
WeightedSigmoidClassificationLoss
()
if
loss_type
==
'weighted_sigmoid_focal'
:
config
=
loss_config
.
weighted_sigmoid_focal
...
...
@@ -191,21 +184,18 @@ def _build_classification_loss(loss_config):
if
config
.
HasField
(
'alpha'
):
alpha
=
config
.
alpha
return
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
config
.
anchorwise_output
,
gamma
=
config
.
gamma
,
alpha
=
alpha
)
if
loss_type
==
'weighted_softmax'
:
config
=
loss_config
.
weighted_softmax
return
losses
.
WeightedSoftmaxClassificationLoss
(
anchorwise_output
=
config
.
anchorwise_output
,
logit_scale
=
config
.
logit_scale
)
if
loss_type
==
'bootstrapped_sigmoid'
:
config
=
loss_config
.
bootstrapped_sigmoid
return
losses
.
BootstrappedSigmoidClassificationLoss
(
alpha
=
config
.
alpha
,
bootstrap_type
=
(
'hard'
if
config
.
hard_bootstrap
else
'soft'
),
anchorwise_output
=
config
.
anchorwise_output
)
bootstrap_type
=
(
'hard'
if
config
.
hard_bootstrap
else
'soft'
))
raise
ValueError
(
'Empty loss config.'
)
research/object_detection/builders/losses_builder_test.py
View file @
fd7b6887
...
...
@@ -80,7 +80,6 @@ class LocalizationLossBuilderTest(tf.test.TestCase):
losses_text_proto
=
"""
localization_loss {
weighted_smooth_l1 {
anchorwise_output: true
}
}
classification_loss {
...
...
@@ -245,7 +244,7 @@ class ClassificationLossBuilderTest(tf.test.TestCase):
targets
=
tf
.
constant
([[[
0.0
,
1.0
,
0.0
],
[
0.0
,
0.0
,
1.0
]]])
weights
=
tf
.
constant
([[
1.0
,
1.0
]])
loss
=
classification_loss
(
predictions
,
targets
,
weights
=
weights
)
self
.
assertEqual
(
loss
.
shape
,
[
1
,
2
])
self
.
assertEqual
(
loss
.
shape
,
[
1
,
2
,
3
])
def
test_raise_error_on_empty_config
(
self
):
losses_text_proto
=
"""
...
...
research/object_detection/builders/matcher_builder.py
View file @
fd7b6887
...
...
@@ -45,7 +45,9 @@ def build(matcher_config):
matched_threshold
=
matched_threshold
,
unmatched_threshold
=
unmatched_threshold
,
negatives_lower_than_unmatched
=
matcher
.
negatives_lower_than_unmatched
,
force_match_for_each_row
=
matcher
.
force_match_for_each_row
)
force_match_for_each_row
=
matcher
.
force_match_for_each_row
,
use_matmul_gather
=
matcher
.
use_matmul_gather
)
if
matcher_config
.
WhichOneof
(
'matcher_oneof'
)
==
'bipartite_matcher'
:
return
bipartite_matcher
.
GreedyBipartiteMatcher
()
matcher
=
matcher_config
.
bipartite_matcher
return
bipartite_matcher
.
GreedyBipartiteMatcher
(
matcher
.
use_matmul_gather
)
raise
ValueError
(
'Empty matcher.'
)
research/object_detection/builders/matcher_builder_test.py
View file @
fd7b6887
...
...
@@ -62,6 +62,7 @@ class MatcherBuilderTest(tf.test.TestCase):
unmatched_threshold: 0.3
negatives_lower_than_unmatched: false
force_match_for_each_row: true
use_matmul_gather: true
}
"""
matcher_proto
=
matcher_pb2
.
Matcher
()
...
...
@@ -72,6 +73,7 @@ class MatcherBuilderTest(tf.test.TestCase):
self
.
assertAlmostEqual
(
matcher_object
.
_unmatched_threshold
,
0.3
)
self
.
assertFalse
(
matcher_object
.
_negatives_lower_than_unmatched
)
self
.
assertTrue
(
matcher_object
.
_force_match_for_each_row
)
self
.
assertTrue
(
matcher_object
.
_use_matmul_gather
)
def
test_build_bipartite_matcher
(
self
):
matcher_text_proto
=
"""
...
...
research/object_detection/builders/model_builder.py
View file @
fd7b6887
...
...
@@ -31,6 +31,7 @@ from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extr
from
object_detection.models
import
faster_rcnn_inception_v2_feature_extractor
as
frcnn_inc_v2
from
object_detection.models
import
faster_rcnn_nas_feature_extractor
as
frcnn_nas
from
object_detection.models
import
faster_rcnn_resnet_v1_feature_extractor
as
frcnn_resnet_v1
from
object_detection.models
import
ssd_resnet_v1_fpn_feature_extractor
as
ssd_resnet_v1_fpn
from
object_detection.models.embedded_ssd_mobilenet_v1_feature_extractor
import
EmbeddedSSDMobileNetV1FeatureExtractor
from
object_detection.models.ssd_inception_v2_feature_extractor
import
SSDInceptionV2FeatureExtractor
from
object_detection.models.ssd_inception_v3_feature_extractor
import
SSDInceptionV3FeatureExtractor
...
...
@@ -42,6 +43,9 @@ SSD_FEATURE_EXTRACTOR_CLASS_MAP = {
'ssd_inception_v2'
:
SSDInceptionV2FeatureExtractor
,
'ssd_inception_v3'
:
SSDInceptionV3FeatureExtractor
,
'ssd_mobilenet_v1'
:
SSDMobileNetV1FeatureExtractor
,
'ssd_resnet50_v1_fpn'
:
ssd_resnet_v1_fpn
.
SSDResnet50V1FpnFeatureExtractor
,
'ssd_resnet101_v1_fpn'
:
ssd_resnet_v1_fpn
.
SSDResnet101V1FpnFeatureExtractor
,
'ssd_resnet152_v1_fpn'
:
ssd_resnet_v1_fpn
.
SSDResnet152V1FpnFeatureExtractor
,
'embedded_ssd_mobilenet_v1'
:
EmbeddedSSDMobileNetV1FeatureExtractor
,
}
...
...
@@ -62,13 +66,14 @@ FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP = {
}
def
build
(
model_config
,
is_training
):
def
build
(
model_config
,
is_training
,
add_summaries
=
True
):
"""Builds a DetectionModel based on the model config.
Args:
model_config: A model.proto object containing the config for the desired
DetectionModel.
is_training: True if this model is being built for training purposes.
add_summaries: Whether to add tensorflow summaries in the model graph.
Returns:
DetectionModel based on the config.
...
...
@@ -80,9 +85,10 @@ def build(model_config, is_training):
raise
ValueError
(
'model_config not of type model_pb2.DetectionModel.'
)
meta_architecture
=
model_config
.
WhichOneof
(
'model'
)
if
meta_architecture
==
'ssd'
:
return
_build_ssd_model
(
model_config
.
ssd
,
is_training
)
return
_build_ssd_model
(
model_config
.
ssd
,
is_training
,
add_summaries
)
if
meta_architecture
==
'faster_rcnn'
:
return
_build_faster_rcnn_model
(
model_config
.
faster_rcnn
,
is_training
)
return
_build_faster_rcnn_model
(
model_config
.
faster_rcnn
,
is_training
,
add_summaries
)
raise
ValueError
(
'Unknown meta architecture: {}'
.
format
(
meta_architecture
))
...
...
@@ -106,6 +112,8 @@ def _build_ssd_feature_extractor(feature_extractor_config, is_training,
min_depth
=
feature_extractor_config
.
min_depth
pad_to_multiple
=
feature_extractor_config
.
pad_to_multiple
batch_norm_trainable
=
feature_extractor_config
.
batch_norm_trainable
use_explicit_padding
=
feature_extractor_config
.
use_explicit_padding
use_depthwise
=
feature_extractor_config
.
use_depthwise
conv_hyperparams
=
hyperparams_builder
.
build
(
feature_extractor_config
.
conv_hyperparams
,
is_training
)
...
...
@@ -115,16 +123,18 @@ def _build_ssd_feature_extractor(feature_extractor_config, is_training,
feature_extractor_class
=
SSD_FEATURE_EXTRACTOR_CLASS_MAP
[
feature_type
]
return
feature_extractor_class
(
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
,
reuse_weights
)
batch_norm_trainable
,
reuse_weights
,
use_explicit_padding
,
use_depthwise
)
def
_build_ssd_model
(
ssd_config
,
is_training
):
def
_build_ssd_model
(
ssd_config
,
is_training
,
add_summaries
):
"""Builds an SSD detection model based on the model config.
Args:
ssd_config: A ssd.proto object containing the config for the desired
SSDMetaArch.
is_training: True if this model is being built for training purposes.
add_summaries: Whether to add tf summaries in the model.
Returns:
SSDMetaArch based on the config.
...
...
@@ -171,7 +181,8 @@ def _build_ssd_model(ssd_config, is_training):
classification_weight
,
localization_weight
,
normalize_loss_by_num_matches
,
hard_example_miner
)
hard_example_miner
,
add_summaries
=
add_summaries
)
def
_build_faster_rcnn_feature_extractor
(
...
...
@@ -205,7 +216,7 @@ def _build_faster_rcnn_feature_extractor(
batch_norm_trainable
,
reuse_weights
)
def
_build_faster_rcnn_model
(
frcnn_config
,
is_training
):
def
_build_faster_rcnn_model
(
frcnn_config
,
is_training
,
add_summaries
):
"""Builds a Faster R-CNN or R-FCN detection model based on the model config.
Builds R-FCN model if the second_stage_box_predictor in the config is of type
...
...
@@ -213,8 +224,9 @@ def _build_faster_rcnn_model(frcnn_config, is_training):
Args:
frcnn_config: A faster_rcnn.proto object containing the config for the
desired FasterRCNNMetaArch or RFCNMetaArch.
desired FasterRCNNMetaArch or RFCNMetaArch.
is_training: True if this model is being built for training purposes.
add_summaries: Whether to add tf summaries in the model.
Returns:
FasterRCNNMetaArch based on the config.
...
...
@@ -228,7 +240,7 @@ def _build_faster_rcnn_model(frcnn_config, is_training):
feature_extractor
=
_build_faster_rcnn_feature_extractor
(
frcnn_config
.
feature_extractor
,
is_training
)
first
_stage
_only
=
frcnn_config
.
first
_stage
_only
number_of
_stage
s
=
frcnn_config
.
number_of
_stage
s
first_stage_anchor_generator
=
anchor_generator_builder
.
build
(
frcnn_config
.
first_stage_anchor_generator
)
...
...
@@ -283,7 +295,7 @@ def _build_faster_rcnn_model(frcnn_config, is_training):
'num_classes'
:
num_classes
,
'image_resizer_fn'
:
image_resizer_fn
,
'feature_extractor'
:
feature_extractor
,
'
first_stage_only'
:
first
_stage
_only
,
'
number_of_stages'
:
number_of
_stage
s
,
'first_stage_anchor_generator'
:
first_stage_anchor_generator
,
'first_stage_atrous_rate'
:
first_stage_atrous_rate
,
'first_stage_box_predictor_arg_scope'
:
...
...
@@ -310,7 +322,8 @@ def _build_faster_rcnn_model(frcnn_config, is_training):
second_stage_classification_loss
,
'second_stage_classification_loss_weight'
:
second_stage_classification_loss_weight
,
'hard_example_miner'
:
hard_example_miner
}
'hard_example_miner'
:
hard_example_miner
,
'add_summaries'
:
add_summaries
}
if
isinstance
(
second_stage_box_predictor
,
box_predictor
.
RfcnBoxPredictor
):
return
rfcn_meta_arch
.
RFCNMetaArch
(
...
...
research/object_detection/builders/model_builder_test.py
View file @
fd7b6887
...
...
@@ -26,12 +26,14 @@ from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extr
from
object_detection.models
import
faster_rcnn_inception_v2_feature_extractor
as
frcnn_inc_v2
from
object_detection.models
import
faster_rcnn_nas_feature_extractor
as
frcnn_nas
from
object_detection.models
import
faster_rcnn_resnet_v1_feature_extractor
as
frcnn_resnet_v1
from
object_detection.models
import
ssd_resnet_v1_fpn_feature_extractor
as
ssd_resnet_v1_fpn
from
object_detection.models.embedded_ssd_mobilenet_v1_feature_extractor
import
EmbeddedSSDMobileNetV1FeatureExtractor
from
object_detection.models.ssd_inception_v2_feature_extractor
import
SSDInceptionV2FeatureExtractor
from
object_detection.models.ssd_inception_v3_feature_extractor
import
SSDInceptionV3FeatureExtractor
from
object_detection.models.ssd_mobilenet_v1_feature_extractor
import
SSDMobileNetV1FeatureExtractor
from
object_detection.protos
import
model_pb2
F
EATURE_EXTRACTOR
_MAPS
=
{
F
RCNN_RESNET_FEAT
_MAPS
=
{
'faster_rcnn_resnet50'
:
frcnn_resnet_v1
.
FasterRCNNResnet50FeatureExtractor
,
'faster_rcnn_resnet101'
:
...
...
@@ -40,6 +42,15 @@ FEATURE_EXTRACTOR_MAPS = {
frcnn_resnet_v1
.
FasterRCNNResnet152FeatureExtractor
}
SSD_RESNET_V1_FPN_FEAT_MAPS
=
{
'ssd_resnet50_v1_fpn'
:
ssd_resnet_v1_fpn
.
SSDResnet50V1FpnFeatureExtractor
,
'ssd_resnet101_v1_fpn'
:
ssd_resnet_v1_fpn
.
SSDResnet101V1FpnFeatureExtractor
,
'ssd_resnet152_v1_fpn'
:
ssd_resnet_v1_fpn
.
SSDResnet152V1FpnFeatureExtractor
}
class
ModelBuilderTest
(
tf
.
test
.
TestCase
):
...
...
@@ -197,6 +208,87 @@ class ModelBuilderTest(tf.test.TestCase):
self
.
assertIsInstance
(
model
.
_feature_extractor
,
SSDInceptionV3FeatureExtractor
)
def
test_create_ssd_resnet_v1_fpn_model_from_config
(
self
):
model_text_proto
=
"""
ssd {
feature_extractor {
type: 'ssd_resnet50_v1_fpn'
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
batch_norm_trainable: true
}
box_coder {
faster_rcnn_box_coder {
}
}
matcher {
argmax_matcher {
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
multiscale_anchor_generator {
aspect_ratios: [1.0, 2.0, 0.5]
scales_per_octave: 2
}
}
image_resizer {
fixed_shape_resizer {
height: 320
width: 320
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
depth: 32
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
num_layers_before_predictor: 1
}
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.25
gamma: 2.0
}
}
localization_loss {
weighted_smooth_l1 {
}
}
classification_weight: 1.0
localization_weight: 1.0
}
}"""
model_proto
=
model_pb2
.
DetectionModel
()
text_format
.
Merge
(
model_text_proto
,
model_proto
)
for
extractor_type
,
extractor_class
in
SSD_RESNET_V1_FPN_FEAT_MAPS
.
items
():
model_proto
.
ssd
.
feature_extractor
.
type
=
extractor_type
model
=
model_builder
.
build
(
model_proto
,
is_training
=
True
)
self
.
assertIsInstance
(
model
,
ssd_meta_arch
.
SSDMetaArch
)
self
.
assertIsInstance
(
model
.
_feature_extractor
,
extractor_class
)
def
test_create_ssd_mobilenet_v1_model_from_config
(
self
):
model_text_proto
=
"""
ssd {
...
...
@@ -270,6 +362,78 @@ class ModelBuilderTest(tf.test.TestCase):
SSDMobileNetV1FeatureExtractor
)
self
.
assertTrue
(
model
.
_feature_extractor
.
_batch_norm_trainable
)
def
test_create_embedded_ssd_mobilenet_v1_model_from_config
(
self
):
model_text_proto
=
"""
ssd {
feature_extractor {
type: 'embedded_ssd_mobilenet_v1'
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
batch_norm_trainable: true
}
box_coder {
faster_rcnn_box_coder {
}
}
matcher {
argmax_matcher {
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
aspect_ratios: 1.0
}
}
image_resizer {
fixed_shape_resizer {
height: 256
width: 256
}
}
box_predictor {
convolutional_box_predictor {
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
}
loss {
classification_loss {
weighted_softmax {
}
}
localization_loss {
weighted_smooth_l1 {
}
}
}
}"""
model_proto
=
model_pb2
.
DetectionModel
()
text_format
.
Merge
(
model_text_proto
,
model_proto
)
model
=
self
.
create_model
(
model_proto
)
self
.
assertIsInstance
(
model
,
ssd_meta_arch
.
SSDMetaArch
)
self
.
assertIsInstance
(
model
.
_feature_extractor
,
EmbeddedSSDMobileNetV1FeatureExtractor
)
def
test_create_faster_rcnn_resnet_v1_models_from_config
(
self
):
model_text_proto
=
"""
faster_rcnn {
...
...
@@ -331,7 +495,7 @@ class ModelBuilderTest(tf.test.TestCase):
}"""
model_proto
=
model_pb2
.
DetectionModel
()
text_format
.
Merge
(
model_text_proto
,
model_proto
)
for
extractor_type
,
extractor_class
in
F
EATURE_EXTRACTOR
_MAPS
.
items
():
for
extractor_type
,
extractor_class
in
F
RCNN_RESNET_FEAT
_MAPS
.
items
():
model_proto
.
faster_rcnn
.
feature_extractor
.
type
=
extractor_type
model
=
model_builder
.
build
(
model_proto
,
is_training
=
True
)
self
.
assertIsInstance
(
model
,
faster_rcnn_meta_arch
.
FasterRCNNMetaArch
)
...
...
@@ -730,7 +894,7 @@ class ModelBuilderTest(tf.test.TestCase):
}"""
model_proto
=
model_pb2
.
DetectionModel
()
text_format
.
Merge
(
model_text_proto
,
model_proto
)
for
extractor_type
,
extractor_class
in
F
EATURE_EXTRACTOR
_MAPS
.
items
():
for
extractor_type
,
extractor_class
in
F
RCNN_RESNET_FEAT
_MAPS
.
items
():
model_proto
.
faster_rcnn
.
feature_extractor
.
type
=
extractor_type
model
=
model_builder
.
build
(
model_proto
,
is_training
=
True
)
self
.
assertIsInstance
(
model
,
rfcn_meta_arch
.
RFCNMetaArch
)
...
...
research/object_detection/builders/optimizer_builder.py
View file @
fd7b6887
...
...
@@ -19,15 +19,14 @@ import tensorflow as tf
from
object_detection.utils
import
learning_schedules
def
build
(
optimizer_config
,
global_summaries
):
def
build
(
optimizer_config
):
"""Create optimizer based on config.
Args:
optimizer_config: A Optimizer proto message.
global_summaries: A set to attach learning rate summary to.
Returns:
An optimizer.
An optimizer
and a list of variables for summary
.
Raises:
ValueError: when using an unsupported input data type.
...
...
@@ -35,24 +34,30 @@ def build(optimizer_config, global_summaries):
optimizer_type
=
optimizer_config
.
WhichOneof
(
'optimizer'
)
optimizer
=
None
summary_vars
=
[]
if
optimizer_type
==
'rms_prop_optimizer'
:
config
=
optimizer_config
.
rms_prop_optimizer
learning_rate
=
_create_learning_rate
(
config
.
learning_rate
)
summary_vars
.
append
(
learning_rate
)
optimizer
=
tf
.
train
.
RMSPropOptimizer
(
_create_
learning_rate
(
config
.
learning_rate
,
global_summaries
)
,
learning_rate
,
decay
=
config
.
decay
,
momentum
=
config
.
momentum_optimizer_value
,
epsilon
=
config
.
epsilon
)
if
optimizer_type
==
'momentum_optimizer'
:
config
=
optimizer_config
.
momentum_optimizer
learning_rate
=
_create_learning_rate
(
config
.
learning_rate
)
summary_vars
.
append
(
learning_rate
)
optimizer
=
tf
.
train
.
MomentumOptimizer
(
_create_
learning_rate
(
config
.
learning_rate
,
global_summaries
)
,
learning_rate
,
momentum
=
config
.
momentum_optimizer_value
)
if
optimizer_type
==
'adam_optimizer'
:
config
=
optimizer_config
.
adam_optimizer
optimizer
=
tf
.
train
.
AdamOptimizer
(
_create_learning_rate
(
config
.
learning_rate
,
global_summaries
))
learning_rate
=
_create_learning_rate
(
config
.
learning_rate
)
summary_vars
.
append
(
learning_rate
)
optimizer
=
tf
.
train
.
AdamOptimizer
(
learning_rate
)
if
optimizer
is
None
:
raise
ValueError
(
'Optimizer %s not supported.'
%
optimizer_type
)
...
...
@@ -61,15 +66,14 @@ def build(optimizer_config, global_summaries):
optimizer
=
tf
.
contrib
.
opt
.
MovingAverageOptimizer
(
optimizer
,
average_decay
=
optimizer_config
.
moving_average_decay
)
return
optimizer
return
optimizer
,
summary_vars
def
_create_learning_rate
(
learning_rate_config
,
global_summaries
):
def
_create_learning_rate
(
learning_rate_config
):
"""Create optimizer learning rate based on config.
Args:
learning_rate_config: A LearningRate proto message.
global_summaries: A set to attach learning rate summary to.
Returns:
A learning rate.
...
...
@@ -81,7 +85,7 @@ def _create_learning_rate(learning_rate_config, global_summaries):
learning_rate_type
=
learning_rate_config
.
WhichOneof
(
'learning_rate'
)
if
learning_rate_type
==
'constant_learning_rate'
:
config
=
learning_rate_config
.
constant_learning_rate
learning_rate
=
config
.
learning_rate
learning_rate
=
tf
.
constant
(
config
.
learning_rate
,
dtype
=
tf
.
float32
)
if
learning_rate_type
==
'exponential_decay_learning_rate'
:
config
=
learning_rate_config
.
exponential_decay_learning_rate
...
...
@@ -115,5 +119,4 @@ def _create_learning_rate(learning_rate_config, global_summaries):
if
learning_rate
is
None
:
raise
ValueError
(
'Learning_rate %s not supported.'
%
learning_rate_type
)
global_summaries
.
add
(
tf
.
summary
.
scalar
(
'Learning_Rate'
,
learning_rate
))
return
learning_rate
research/object_detection/builders/optimizer_builder_test.py
View file @
fd7b6887
...
...
@@ -31,12 +31,13 @@ class LearningRateBuilderTest(tf.test.TestCase):
learning_rate: 0.004
}
"""
global_summaries
=
set
([])
learning_rate_proto
=
optimizer_pb2
.
LearningRate
()
text_format
.
Merge
(
learning_rate_text_proto
,
learning_rate_proto
)
learning_rate
=
optimizer_builder
.
_create_learning_rate
(
learning_rate_proto
,
global_summaries
)
self
.
assertAlmostEqual
(
learning_rate
,
0.004
)
learning_rate_proto
)
with
self
.
test_session
():
learning_rate_out
=
learning_rate
.
eval
()
self
.
assertAlmostEqual
(
learning_rate_out
,
0.004
)
def
testBuildExponentialDecayLearningRate
(
self
):
learning_rate_text_proto
=
"""
...
...
@@ -47,11 +48,10 @@ class LearningRateBuilderTest(tf.test.TestCase):
staircase: false
}
"""
global_summaries
=
set
([])
learning_rate_proto
=
optimizer_pb2
.
LearningRate
()
text_format
.
Merge
(
learning_rate_text_proto
,
learning_rate_proto
)
learning_rate
=
optimizer_builder
.
_create_learning_rate
(
learning_rate_proto
,
global_summaries
)
learning_rate_proto
)
self
.
assertTrue
(
isinstance
(
learning_rate
,
tf
.
Tensor
))
def
testBuildManualStepLearningRate
(
self
):
...
...
@@ -67,11 +67,10 @@ class LearningRateBuilderTest(tf.test.TestCase):
}
}
"""
global_summaries
=
set
([])
learning_rate_proto
=
optimizer_pb2
.
LearningRate
()
text_format
.
Merge
(
learning_rate_text_proto
,
learning_rate_proto
)
learning_rate
=
optimizer_builder
.
_create_learning_rate
(
learning_rate_proto
,
global_summaries
)
learning_rate_proto
)
self
.
assertTrue
(
isinstance
(
learning_rate
,
tf
.
Tensor
))
def
testBuildCosineDecayLearningRate
(
self
):
...
...
@@ -83,22 +82,19 @@ class LearningRateBuilderTest(tf.test.TestCase):
warmup_steps: 1000
}
"""
global_summaries
=
set
([])
learning_rate_proto
=
optimizer_pb2
.
LearningRate
()
text_format
.
Merge
(
learning_rate_text_proto
,
learning_rate_proto
)
learning_rate
=
optimizer_builder
.
_create_learning_rate
(
learning_rate_proto
,
global_summaries
)
learning_rate_proto
)
self
.
assertTrue
(
isinstance
(
learning_rate
,
tf
.
Tensor
))
def
testRaiseErrorOnEmptyLearningRate
(
self
):
learning_rate_text_proto
=
"""
"""
global_summaries
=
set
([])
learning_rate_proto
=
optimizer_pb2
.
LearningRate
()
text_format
.
Merge
(
learning_rate_text_proto
,
learning_rate_proto
)
with
self
.
assertRaises
(
ValueError
):
optimizer_builder
.
_create_learning_rate
(
learning_rate_proto
,
global_summaries
)
optimizer_builder
.
_create_learning_rate
(
learning_rate_proto
)
class
OptimizerBuilderTest
(
tf
.
test
.
TestCase
):
...
...
@@ -119,10 +115,9 @@ class OptimizerBuilderTest(tf.test.TestCase):
}
use_moving_average: false
"""
global_summaries
=
set
([])
optimizer_proto
=
optimizer_pb2
.
Optimizer
()
text_format
.
Merge
(
optimizer_text_proto
,
optimizer_proto
)
optimizer
=
optimizer_builder
.
build
(
optimizer_proto
,
global_summaries
)
optimizer
,
_
=
optimizer_builder
.
build
(
optimizer_proto
)
self
.
assertTrue
(
isinstance
(
optimizer
,
tf
.
train
.
RMSPropOptimizer
))
def
testBuildMomentumOptimizer
(
self
):
...
...
@@ -137,10 +132,9 @@ class OptimizerBuilderTest(tf.test.TestCase):
}
use_moving_average: false
"""
global_summaries
=
set
([])
optimizer_proto
=
optimizer_pb2
.
Optimizer
()
text_format
.
Merge
(
optimizer_text_proto
,
optimizer_proto
)
optimizer
=
optimizer_builder
.
build
(
optimizer_proto
,
global_summaries
)
optimizer
,
_
=
optimizer_builder
.
build
(
optimizer_proto
)
self
.
assertTrue
(
isinstance
(
optimizer
,
tf
.
train
.
MomentumOptimizer
))
def
testBuildAdamOptimizer
(
self
):
...
...
@@ -154,10 +148,9 @@ class OptimizerBuilderTest(tf.test.TestCase):
}
use_moving_average: false
"""
global_summaries
=
set
([])
optimizer_proto
=
optimizer_pb2
.
Optimizer
()
text_format
.
Merge
(
optimizer_text_proto
,
optimizer_proto
)
optimizer
=
optimizer_builder
.
build
(
optimizer_proto
,
global_summaries
)
optimizer
,
_
=
optimizer_builder
.
build
(
optimizer_proto
)
self
.
assertTrue
(
isinstance
(
optimizer
,
tf
.
train
.
AdamOptimizer
))
def
testBuildMovingAverageOptimizer
(
self
):
...
...
@@ -171,10 +164,9 @@ class OptimizerBuilderTest(tf.test.TestCase):
}
use_moving_average: True
"""
global_summaries
=
set
([])
optimizer_proto
=
optimizer_pb2
.
Optimizer
()
text_format
.
Merge
(
optimizer_text_proto
,
optimizer_proto
)
optimizer
=
optimizer_builder
.
build
(
optimizer_proto
,
global_summaries
)
optimizer
,
_
=
optimizer_builder
.
build
(
optimizer_proto
)
self
.
assertTrue
(
isinstance
(
optimizer
,
tf
.
contrib
.
opt
.
MovingAverageOptimizer
))
...
...
@@ -190,23 +182,21 @@ class OptimizerBuilderTest(tf.test.TestCase):
use_moving_average: True
moving_average_decay: 0.2
"""
global_summaries
=
set
([])
optimizer_proto
=
optimizer_pb2
.
Optimizer
()
text_format
.
Merge
(
optimizer_text_proto
,
optimizer_proto
)
optimizer
=
optimizer_builder
.
build
(
optimizer_proto
,
global_summaries
)
optimizer
,
_
=
optimizer_builder
.
build
(
optimizer_proto
)
self
.
assertTrue
(
isinstance
(
optimizer
,
tf
.
contrib
.
opt
.
MovingAverageOptimizer
))
# TODO
(rathodv)
: Find a way to not depend on the private members.
# TODO: Find a way to not depend on the private members.
self
.
assertAlmostEqual
(
optimizer
.
_ema
.
_decay
,
0.2
)
def
testBuildEmptyOptimizer
(
self
):
optimizer_text_proto
=
"""
"""
global_summaries
=
set
([])
optimizer_proto
=
optimizer_pb2
.
Optimizer
()
text_format
.
Merge
(
optimizer_text_proto
,
optimizer_proto
)
with
self
.
assertRaises
(
ValueError
):
optimizer_builder
.
build
(
optimizer_proto
,
global_summaries
)
optimizer_builder
.
build
(
optimizer_proto
)
if
__name__
==
'__main__'
:
...
...
research/object_detection/builders/preprocessor_builder.py
View file @
fd7b6887
...
...
@@ -83,6 +83,7 @@ PREPROCESSING_FUNCTION_MAP = {
'random_jitter_boxes'
:
preprocessor
.
random_jitter_boxes
,
'random_crop_to_aspect_ratio'
:
preprocessor
.
random_crop_to_aspect_ratio
,
'random_black_patches'
:
preprocessor
.
random_black_patches
,
'rgb_to_gray'
:
preprocessor
.
rgb_to_gray
,
'scale_boxes_to_pixel_coordinates'
:
(
preprocessor
.
scale_boxes_to_pixel_coordinates
),
'subtract_channel_mean'
:
preprocessor
.
subtract_channel_mean
,
...
...
research/object_detection/builders/preprocessor_builder_test.py
View file @
fd7b6887
...
...
@@ -379,6 +379,16 @@ class PreprocessorBuilderTest(tf.test.TestCase):
'new_width'
:
100
,
'method'
:
tf
.
image
.
ResizeMethod
.
BICUBIC
})
def
test_build_rgb_to_gray
(
self
):
preprocessor_text_proto
=
"""
rgb_to_gray {}
"""
preprocessor_proto
=
preprocessor_pb2
.
PreprocessingStep
()
text_format
.
Merge
(
preprocessor_text_proto
,
preprocessor_proto
)
function
,
args
=
preprocessor_builder
.
build
(
preprocessor_proto
)
self
.
assertEqual
(
function
,
preprocessor
.
rgb_to_gray
)
self
.
assertEqual
(
args
,
{})
def
test_build_subtract_channel_mean
(
self
):
preprocessor_text_proto
=
"""
subtract_channel_mean {
...
...
research/object_detection/core/BUILD
View file @
fd7b6887
...
...
@@ -53,7 +53,7 @@ py_library(
deps
=
[
":box_list"
,
"//tensorflow"
,
"//tensorflow
_
models/object_detection/utils:shape_utils"
,
"//tensorflow
/
models/
research/
object_detection/utils:shape_utils"
,
],
)
...
...
@@ -113,7 +113,7 @@ py_library(
":box_list"
,
":box_list_ops"
,
"//tensorflow"
,
"//tensorflow
_
models/object_detection/utils:ops"
,
"//tensorflow
/
models/
research/
object_detection/utils:ops"
,
],
)
...
...
@@ -123,6 +123,7 @@ py_library(
"matcher.py"
,
],
deps
=
[
"//tensorflow/models/research/object_detection/utils:ops"
,
],
)
...
...
@@ -160,8 +161,17 @@ py_library(
":box_list"
,
":box_list_ops"
,
":keypoint_ops"
,
":preprocessor_cache"
,
":standard_fields"
,
"//tensorflow"
,
"//tensorflow/models/research/object_detection/utils:shape_utils"
,
],
)
py_library
(
name
=
"preprocessor_cache"
,
srcs
=
[
"preprocessor_cache.py"
,
],
)
...
...
@@ -172,6 +182,7 @@ py_test(
],
deps
=
[
":preprocessor"
,
":preprocessor_cache"
,
"//tensorflow"
,
],
)
...
...
@@ -211,6 +222,7 @@ py_library(
":box_list_ops"
,
":standard_fields"
,
"//tensorflow"
,
"//tensorflow/models/research/object_detection/utils:shape_utils"
,
],
)
...
...
@@ -232,15 +244,16 @@ py_library(
],
deps
=
[
":box_list"
,
":box_list_ops"
,
":matcher"
,
":region_similarity_calculator"
,
":standard_fields"
,
"//tensorflow"
,
"//tensorflow_models/object_detection/box_coders:faster_rcnn_box_coder"
,
"//tensorflow_models/object_detection/box_coders:mean_stddev_box_coder"
,
"//tensorflow_models/object_detection/core:box_coder"
,
"//tensorflow_models/object_detection/matchers:argmax_matcher"
,
"//tensorflow_models/object_detection/matchers:bipartite_matcher"
,
"//tensorflow/models/research/object_detection/box_coders:faster_rcnn_box_coder"
,
"//tensorflow/models/research/object_detection/box_coders:mean_stddev_box_coder"
,
"//tensorflow/models/research/object_detection/core:box_coder"
,
"//tensorflow/models/research/object_detection/matchers:argmax_matcher"
,
"//tensorflow/models/research/object_detection/matchers:bipartite_matcher"
,
"//tensorflow/models/research/object_detection/utils:shape_utils"
,
],
)
...
...
@@ -254,8 +267,10 @@ py_test(
":region_similarity_calculator"
,
":target_assigner"
,
"//tensorflow"
,
"//tensorflow_models/object_detection/box_coders:mean_stddev_box_coder"
,
"//tensorflow_models/object_detection/matchers:bipartite_matcher"
,
"//tensorflow/models/research/object_detection/box_coders:keypoint_box_coder"
,
"//tensorflow/models/research/object_detection/box_coders:mean_stddev_box_coder"
,
"//tensorflow/models/research/object_detection/matchers:bipartite_matcher"
,
"//tensorflow/models/research/object_detection/utils:test_case"
,
],
)
...
...
@@ -274,9 +289,9 @@ py_library(
srcs
=
[
"box_predictor.py"
],
deps
=
[
"//tensorflow"
,
"//tensorflow
_
models/object_detection/utils:ops"
,
"//tensorflow
_
models/object_detection/utils:shape_utils"
,
"//tensorflow
_
models/object_detection/utils:static_shape"
,
"//tensorflow
/
models/
research/
object_detection/utils:ops"
,
"//tensorflow
/
models/
research/
object_detection/utils:shape_utils"
,
"//tensorflow
/
models/
research/
object_detection/utils:static_shape"
,
],
)
...
...
@@ -286,8 +301,9 @@ py_test(
deps
=
[
":box_predictor"
,
"//tensorflow"
,
"//tensorflow_models/object_detection/builders:hyperparams_builder"
,
"//tensorflow_models/object_detection/protos:hyperparams_py_pb2"
,
"//tensorflow/models/research/object_detection/builders:hyperparams_builder"
,
"//tensorflow/models/research/object_detection/protos:hyperparams_py_pb2"
,
"//tensorflow/models/research/object_detection/utils:test_case"
,
],
)
...
...
@@ -298,7 +314,7 @@ py_library(
],
deps
=
[
"//tensorflow"
,
"//tensorflow
_
models/object_detection/core:box_list_ops"
,
"//tensorflow
/
models/
research/
object_detection/core:box_list_ops"
,
],
)
...
...
@@ -309,7 +325,7 @@ py_test(
],
deps
=
[
":region_similarity_calculator"
,
"//tensorflow
_
models/object_detection/core:box_list"
,
"//tensorflow
/
models/
research/
object_detection/core:box_list"
,
],
)
...
...
@@ -330,7 +346,7 @@ py_library(
],
deps
=
[
"//tensorflow"
,
"//tensorflow
_
models/object_detection/utils:ops"
,
"//tensorflow
/
models/
research/
object_detection/utils:ops"
,
],
)
...
...
research/object_detection/core/__init__.py
View file @
fd7b6887
research/object_detection/core/anchor_generator.py
View file @
fd7b6887
...
...
@@ -77,8 +77,8 @@ class AnchorGenerator(object):
def
generate
(
self
,
feature_map_shape_list
,
**
params
):
"""Generates a collection of bounding boxes to be used as anchors.
TODO: remove **params from argument list and make stride and
offsets (for
multiple_grid_anchor_generator) constructor arguments.
TODO: remove **params from argument list and make stride and
offsets (for
multiple_grid_anchor_generator) constructor arguments.
Args:
feature_map_shape_list: list of (height, width) pairs in the format
...
...
@@ -140,3 +140,4 @@ class AnchorGenerator(object):
*
feature_map_shape
[
0
]
*
feature_map_shape
[
1
])
return
tf
.
assert_equal
(
expected_num_anchors
,
anchors
.
num_boxes
())
research/object_detection/core/box_list_ops.py
View file @
fd7b6887
...
...
@@ -183,7 +183,8 @@ def prune_completely_outside_window(boxlist, window, scope=None):
scope: name scope.
Returns:
pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in
pruned_boxlist: a new BoxList with all bounding boxes partially or fully in
the window.
valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
in the input tensor.
"""
...
...
@@ -982,3 +983,79 @@ def pad_or_clip_box_list(boxlist, num_boxes, scope=None):
boxlist
.
get_field
(
field
),
num_boxes
)
subboxlist
.
add_field
(
field
,
subfield
)
return
subboxlist
def
select_random_box
(
boxlist
,
default_box
=
None
,
seed
=
None
,
scope
=
None
):
"""Selects a random bounding box from a `BoxList`.
Args:
boxlist: A BoxList.
default_box: A [1, 4] float32 tensor. If no boxes are present in `boxlist`,
this default box will be returned. If None, will use a default box of
[[-1., -1., -1., -1.]].
seed: Random seed.
scope: Name scope.
Returns:
bbox: A [1, 4] tensor with a random bounding box.
valid: A bool tensor indicating whether a valid bounding box is returned
(True) or whether the default box is returned (False).
"""
with
tf
.
name_scope
(
scope
,
'SelectRandomBox'
):
bboxes
=
boxlist
.
get
()
combined_shape
=
shape_utils
.
combined_static_and_dynamic_shape
(
bboxes
)
number_of_boxes
=
combined_shape
[
0
]
default_box
=
default_box
or
tf
.
constant
([[
-
1.
,
-
1.
,
-
1.
,
-
1.
]])
def
select_box
():
random_index
=
tf
.
random_uniform
([],
maxval
=
number_of_boxes
,
dtype
=
tf
.
int32
,
seed
=
seed
)
return
tf
.
expand_dims
(
bboxes
[
random_index
],
axis
=
0
),
tf
.
constant
(
True
)
return
tf
.
cond
(
tf
.
greater_equal
(
number_of_boxes
,
1
),
true_fn
=
select_box
,
false_fn
=
lambda
:
(
default_box
,
tf
.
constant
(
False
)))
def
get_minimal_coverage_box
(
boxlist
,
default_box
=
None
,
scope
=
None
):
"""Creates a single bounding box which covers all boxes in the boxlist.
Args:
boxlist: A Boxlist.
default_box: A [1, 4] float32 tensor. If no boxes are present in `boxlist`,
this default box will be returned. If None, will use a default box of
[[0., 0., 1., 1.]].
scope: Name scope.
Returns:
A [1, 4] float32 tensor with a bounding box that tightly covers all the
boxes in the box list. If the boxlist does not contain any boxes, the
default box is returned.
"""
with
tf
.
name_scope
(
scope
,
'CreateCoverageBox'
):
num_boxes
=
boxlist
.
num_boxes
()
def
coverage_box
(
bboxes
):
y_min
,
x_min
,
y_max
,
x_max
=
tf
.
split
(
value
=
bboxes
,
num_or_size_splits
=
4
,
axis
=
1
)
y_min_coverage
=
tf
.
reduce_min
(
y_min
,
axis
=
0
)
x_min_coverage
=
tf
.
reduce_min
(
x_min
,
axis
=
0
)
y_max_coverage
=
tf
.
reduce_max
(
y_max
,
axis
=
0
)
x_max_coverage
=
tf
.
reduce_max
(
x_max
,
axis
=
0
)
return
tf
.
stack
(
[
y_min_coverage
,
x_min_coverage
,
y_max_coverage
,
x_max_coverage
],
axis
=
1
)
default_box
=
default_box
or
tf
.
constant
([[
0.
,
0.
,
1.
,
1.
]])
return
tf
.
cond
(
tf
.
greater_equal
(
num_boxes
,
1
),
true_fn
=
lambda
:
coverage_box
(
boxlist
.
get
()),
false_fn
=
lambda
:
default_box
)
research/object_detection/core/box_list_ops_test.py
View file @
fd7b6887
...
...
@@ -153,6 +153,25 @@ class BoxListOpsTest(tf.test.TestCase):
extra_data_out
=
sess
.
run
(
pruned
.
get_field
(
'extra_data'
))
self
.
assertAllEqual
(
extra_data_out
,
[[
1
],
[
2
],
[
3
],
[
4
],
[
6
]])
def
test_prune_completely_outside_window_with_empty_boxlist
(
self
):
window
=
tf
.
constant
([
0
,
0
,
9
,
14
],
tf
.
float32
)
corners
=
tf
.
zeros
(
shape
=
[
0
,
4
],
dtype
=
tf
.
float32
)
boxes
=
box_list
.
BoxList
(
corners
)
boxes
.
add_field
(
'extra_data'
,
tf
.
zeros
(
shape
=
[
0
],
dtype
=
tf
.
int32
))
pruned
,
keep_indices
=
box_list_ops
.
prune_completely_outside_window
(
boxes
,
window
)
pruned_boxes
=
pruned
.
get
()
extra
=
pruned
.
get_field
(
'extra_data'
)
exp_pruned_boxes
=
np
.
zeros
(
shape
=
[
0
,
4
],
dtype
=
np
.
float32
)
exp_extra
=
np
.
zeros
(
shape
=
[
0
],
dtype
=
np
.
int32
)
with
self
.
test_session
()
as
sess
:
pruned_boxes_out
,
keep_indices_out
,
extra_out
=
sess
.
run
(
[
pruned_boxes
,
keep_indices
,
extra
])
self
.
assertAllClose
(
exp_pruned_boxes
,
pruned_boxes_out
)
self
.
assertAllEqual
([],
keep_indices_out
)
self
.
assertAllEqual
(
exp_extra
,
extra_out
)
def
test_intersection
(
self
):
corners1
=
tf
.
constant
([[
4.0
,
3.0
,
7.0
,
5.0
],
[
5.0
,
6.0
,
10.0
,
7.0
]])
corners2
=
tf
.
constant
([[
3.0
,
4.0
,
6.0
,
8.0
],
[
14.0
,
14.0
,
15.0
,
15.0
],
...
...
@@ -593,6 +612,58 @@ class BoxListOpsTest(tf.test.TestCase):
self
.
assertAllEqual
(
expected_classes
,
classes_out
)
self
.
assertAllClose
(
expected_scores
,
scores_out
)
def
test_select_random_box
(
self
):
boxes
=
[[
0.
,
0.
,
1.
,
1.
],
[
0.
,
1.
,
2.
,
3.
],
[
0.
,
2.
,
3.
,
4.
]]
corners
=
tf
.
constant
(
boxes
,
dtype
=
tf
.
float32
)
boxlist
=
box_list
.
BoxList
(
corners
)
random_bbox
,
valid
=
box_list_ops
.
select_random_box
(
boxlist
)
with
self
.
test_session
()
as
sess
:
random_bbox_out
,
valid_out
=
sess
.
run
([
random_bbox
,
valid
])
norm_small
=
any
(
[
np
.
linalg
.
norm
(
random_bbox_out
-
box
)
<
1e-6
for
box
in
boxes
])
self
.
assertTrue
(
norm_small
)
self
.
assertTrue
(
valid_out
)
def
test_select_random_box_with_empty_boxlist
(
self
):
corners
=
tf
.
constant
([],
shape
=
[
0
,
4
],
dtype
=
tf
.
float32
)
boxlist
=
box_list
.
BoxList
(
corners
)
random_bbox
,
valid
=
box_list_ops
.
select_random_box
(
boxlist
)
with
self
.
test_session
()
as
sess
:
random_bbox_out
,
valid_out
=
sess
.
run
([
random_bbox
,
valid
])
expected_bbox_out
=
np
.
array
([[
-
1.
,
-
1.
,
-
1.
,
-
1.
]],
dtype
=
np
.
float32
)
self
.
assertAllEqual
(
expected_bbox_out
,
random_bbox_out
)
self
.
assertFalse
(
valid_out
)
def
test_get_minimal_coverage_box
(
self
):
boxes
=
[[
0.
,
0.
,
1.
,
1.
],
[
-
1.
,
1.
,
2.
,
3.
],
[
0.
,
2.
,
3.
,
4.
]]
expected_coverage_box
=
[[
-
1.
,
0.
,
3.
,
4.
]]
corners
=
tf
.
constant
(
boxes
,
dtype
=
tf
.
float32
)
boxlist
=
box_list
.
BoxList
(
corners
)
coverage_box
=
box_list_ops
.
get_minimal_coverage_box
(
boxlist
)
with
self
.
test_session
()
as
sess
:
coverage_box_out
=
sess
.
run
(
coverage_box
)
self
.
assertAllClose
(
expected_coverage_box
,
coverage_box_out
)
def
test_get_minimal_coverage_box_with_empty_boxlist
(
self
):
corners
=
tf
.
constant
([],
shape
=
[
0
,
4
],
dtype
=
tf
.
float32
)
boxlist
=
box_list
.
BoxList
(
corners
)
coverage_box
=
box_list_ops
.
get_minimal_coverage_box
(
boxlist
)
with
self
.
test_session
()
as
sess
:
coverage_box_out
=
sess
.
run
(
coverage_box
)
self
.
assertAllClose
([[
0.0
,
0.0
,
1.0
,
1.0
]],
coverage_box_out
)
class
ConcatenateTest
(
tf
.
test
.
TestCase
):
...
...
@@ -958,5 +1029,6 @@ class BoxRefinementTest(tf.test.TestCase):
self
.
assertAllClose
(
expected_scores
,
scores_out
)
self
.
assertAllEqual
(
extra_field_out
,
[
0
,
1
,
1
])
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/core/box_predictor.py
View file @
fd7b6887
...
...
@@ -27,6 +27,7 @@ These modules are separated from the main model since the same
few box predictor architectures are shared across many models.
"""
from
abc
import
abstractmethod
import
math
import
tensorflow
as
tf
from
object_detection.utils
import
ops
from
object_detection.utils
import
shape_utils
...
...
@@ -59,8 +60,8 @@ class BoxPredictor(object):
def
num_classes
(
self
):
return
self
.
_num_classes
def
predict
(
self
,
image_features
,
num_predictions_per_location
,
scope
,
**
params
):
def
predict
(
self
,
image_features
,
num_predictions_per_location
,
scope
=
None
,
**
params
):
"""Computes encoded object locations and corresponding confidences.
Takes a high level image feature map as input and produce two predictions,
...
...
@@ -70,10 +71,10 @@ class BoxPredictor(object):
and do not assume anything about their shapes.
Args:
image_features: A float tensor of shape [batch_size, height
, width
,
channels] containing features for a batch of images.
num_predictions_per_location:
an
integer representing the number
of box
predictions to be made per spatial location
in the
feature map.
image_features: A
list of
float tensor
s
of shape [batch_size, height
_i
,
width_i,
channels
_i
] containing features for a batch of images.
num_predictions_per_location:
A list of
integer
s
representing the number
of box
predictions to be made per spatial location
for each
feature map.
scope: Variable and Op scope name.
**params: Additional keyword arguments for specific implementations of
BoxPredictor.
...
...
@@ -86,10 +87,22 @@ class BoxPredictor(object):
class_predictions_with_background: A float tensor of shape
[batch_size, num_anchors, num_classes + 1] representing the class
predictions for the proposals.
Raises:
ValueError: If length of `image_features` is not equal to length of
`num_predictions_per_location`.
"""
with
tf
.
variable_scope
(
scope
):
return
self
.
_predict
(
image_features
,
num_predictions_per_location
,
**
params
)
if
len
(
image_features
)
!=
len
(
num_predictions_per_location
):
raise
ValueError
(
'image_feature and num_predictions_per_location must '
'be of same length, found: {} vs {}'
.
format
(
len
(
image_features
),
len
(
num_predictions_per_location
)))
if
scope
is
not
None
:
with
tf
.
variable_scope
(
scope
):
return
self
.
_predict
(
image_features
,
num_predictions_per_location
,
**
params
)
return
self
.
_predict
(
image_features
,
num_predictions_per_location
,
**
params
)
# TODO: num_predictions_per_location could be moved to constructor.
# This is currently only used by ConvolutionalBoxPredictor.
...
...
@@ -98,10 +111,10 @@ class BoxPredictor(object):
"""Implementations must override this method.
Args:
image_features: A float tensor of shape [batch_size, height
, width
,
channels] containing features for a batch of images.
num_predictions_per_location:
an
integer representing the number
of box
predictions to be made per spatial location
in the
feature map.
image_features: A
list of
float tensor
s
of shape [batch_size, height
_i
,
width_i,
channels
_i
] containing features for a batch of images.
num_predictions_per_location:
A list of
integer
s
representing the number
of box
predictions to be made per spatial location
for each
feature map.
**params: Additional keyword arguments for specific implementations of
BoxPredictor.
...
...
@@ -169,28 +182,35 @@ class RfcnBoxPredictor(BoxPredictor):
"""Computes encoded object locations and corresponding confidences.
Args:
image_features: A float tensor of shape [batch_size, height
, width
,
channels] containing features for a batch of images.
num_predictions_per_location:
an
integer representing the number
of box
predictions to be made per spatial location
in the
feature map.
Currently, this must be set to
1
, or an error will be raised.
image_features: A
list of
float tensor
s
of shape [batch_size, height
_i
,
width_i,
channels
_i
] containing features for a batch of images.
num_predictions_per_location:
A list of
integer
s
representing the number
of box
predictions to be made per spatial location
for each
feature map.
Currently, this must be set to
[1]
, or an error will be raised.
proposal_boxes: A float tensor of shape [batch_size, num_proposals,
box_code_size].
Returns:
box_encodings: A float tensor of shape
[batch_size,
1
, num_classes, code_size] representing the
[batch_size,
num_anchors
, num_classes, code_size] representing the
location of the objects.
class_predictions_with_background: A float tensor of shape
[batch_size,
1
, num_classes + 1] representing the class
[batch_size,
num_anchors
, num_classes + 1] representing the class
predictions for the proposals.
Raises:
ValueError: if num_predictions_per_location is not 1.
ValueError: if num_predictions_per_location is not 1 or if
len(image_features) is not 1.
"""
if
num_predictions_per_location
!=
1
:
if
(
len
(
num_predictions_per_location
)
!=
1
or
num_predictions_per_location
[
0
]
!=
1
):
raise
ValueError
(
'Currently RfcnBoxPredictor only supports '
'predicting a single box per class per location.'
)
if
len
(
image_features
)
!=
1
:
raise
ValueError
(
'length of `image_features` must be 1. Found {}'
.
format
(
len
(
image_features
)))
image_feature
=
image_features
[
0
]
num_predictions_per_location
=
num_predictions_per_location
[
0
]
batch_size
=
tf
.
shape
(
proposal_boxes
)[
0
]
num_boxes
=
tf
.
shape
(
proposal_boxes
)[
1
]
def
get_box_indices
(
proposals
):
...
...
@@ -202,7 +222,7 @@ class RfcnBoxPredictor(BoxPredictor):
tf
.
range
(
start
=
0
,
limit
=
proposals_shape
[
0
]),
1
)
return
tf
.
reshape
(
ones_mat
*
multiplier
,
[
-
1
])
net
=
image_feature
s
net
=
image_feature
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
):
net
=
slim
.
conv2d
(
net
,
self
.
_depth
,
[
1
,
1
],
scope
=
'reduce_depth'
)
# Location predictions.
...
...
@@ -280,6 +300,7 @@ class MaskRCNNBoxPredictor(BoxPredictor):
predict_instance_masks
=
False
,
mask_height
=
14
,
mask_width
=
14
,
mask_prediction_num_conv_layers
=
2
,
mask_prediction_conv_depth
=
256
,
predict_keypoints
=
False
):
"""Constructor.
...
...
@@ -304,13 +325,21 @@ class MaskRCNNBoxPredictor(BoxPredictor):
boxes.
mask_height: Desired output mask height. The default value is 14.
mask_width: Desired output mask width. The default value is 14.
mask_prediction_num_conv_layers: Number of convolution layers applied to
the image_features in mask prediction branch.
mask_prediction_conv_depth: The depth for the first conv2d_transpose op
applied to the image_features in the mask prediciton branch.
applied to the image_features in the mask prediction branch. If set
to 0, the depth of the convolution layers will be automatically chosen
based on the number of object classes and the number of channels in the
image features.
predict_keypoints: Whether to predict keypoints insde detection boxes.
Raises:
ValueError: If predict_instance_masks or predict_keypoints is true.
ValueError: If predict_instance_masks is true but conv_hyperparams is not
set.
ValueError: If predict_keypoints is true since it is not implemented yet.
ValueError: If mask_prediction_num_conv_layers is smaller than two.
"""
super
(
MaskRCNNBoxPredictor
,
self
).
__init__
(
is_training
,
num_classes
)
self
.
_fc_hyperparams
=
fc_hyperparams
...
...
@@ -321,6 +350,7 @@ class MaskRCNNBoxPredictor(BoxPredictor):
self
.
_predict_instance_masks
=
predict_instance_masks
self
.
_mask_height
=
mask_height
self
.
_mask_width
=
mask_width
self
.
_mask_prediction_num_conv_layers
=
mask_prediction_num_conv_layers
self
.
_mask_prediction_conv_depth
=
mask_prediction_conv_depth
self
.
_predict_keypoints
=
predict_keypoints
if
self
.
_predict_keypoints
:
...
...
@@ -329,52 +359,33 @@ class MaskRCNNBoxPredictor(BoxPredictor):
self
.
_conv_hyperparams
is
None
):
raise
ValueError
(
'`conv_hyperparams` must be provided when predicting '
'masks.'
)
if
self
.
_mask_prediction_num_conv_layers
<
2
:
raise
ValueError
(
'Mask prediction should consist of at least 2 conv layers'
)
@
property
def
num_classes
(
self
):
return
self
.
_num_classes
def
_predict
(
self
,
image_features
,
num_predictions_per_location
):
"""Computes encoded object locations and corresponding confidences.
Flattens image_features and applies fully connected ops (with no
non-linearity) to predict box encodings and class predictions. In this
setting, anchors are not spatially arranged in any way and are assumed to
have been folded into the batch dimension. Thus we output 1 for the
anchors dimension.
@
property
def
predicts_instance_masks
(
self
):
return
self
.
_predict_instance_masks
Also optionally predicts instance masks.
The mask prediction head is based on the Mask RCNN paper with the following
modifications: We replace the deconvolution layer with a bilinear resize
and a convolution.
def
_predict_boxes_and_classes
(
self
,
image_features
):
"""Predicts boxes and class scores.
Args:
image_features: A float tensor of shape [batch_size, height, width,
channels] containing features for a batch of images.
num_predictions_per_location: an integer representing the number of box
predictions to be made per spatial location in the feature map.
Currently, this must be set to 1, or an error will be raised.
Returns:
A dictionary containing the following tensors.
box_encodings: A float tensor of shape
[batch_size, 1, num_classes, code_size] representing the
location of the objects.
class_predictions_with_background: A float tensor of shape
[batch_size, 1, num_classes + 1] representing the class
predictions for the proposals.
If predict_masks is True the dictionary also contains:
instance_masks: A float tensor of shape
[batch_size, 1, num_classes, image_height, image_width]
If predict_keypoints is True the dictionary also contains:
keypoints: [batch_size, 1, num_keypoints, 2]
Raises:
ValueError: if num_predictions_per_location is not 1.
box_encodings: A float tensor of shape
[batch_size, 1, num_classes, code_size] representing the location of the
objects.
class_predictions_with_background: A float tensor of shape
[batch_size, 1, num_classes + 1] representing the class predictions for
the proposals.
"""
if
num_predictions_per_location
!=
1
:
raise
ValueError
(
'Currently FullyConnectedBoxPredictor only supports '
'predicting a single box per class per location.'
)
spatial_averaged_image_features
=
tf
.
reduce_mean
(
image_features
,
[
1
,
2
],
keep_dims
=
True
,
name
=
'AvgPool'
)
...
...
@@ -398,34 +409,155 @@ class MaskRCNNBoxPredictor(BoxPredictor):
box_encodings
,
[
-
1
,
1
,
self
.
_num_classes
,
self
.
_box_code_size
])
class_predictions_with_background
=
tf
.
reshape
(
class_predictions_with_background
,
[
-
1
,
1
,
self
.
_num_classes
+
1
])
return
box_encodings
,
class_predictions_with_background
def
_get_mask_predictor_conv_depth
(
self
,
num_feature_channels
,
num_classes
,
class_weight
=
3.0
,
feature_weight
=
2.0
):
"""Computes the depth of the mask predictor convolutions.
Computes the depth of the mask predictor convolutions given feature channels
and number of classes by performing a weighted average of the two in
log space to compute the number of convolution channels. The weights that
are used for computing the weighted average do not need to sum to 1.
Args:
num_feature_channels: An integer containing the number of feature
channels.
num_classes: An integer containing the number of classes.
class_weight: Class weight used in computing the weighted average.
feature_weight: Feature weight used in computing the weighted average.
predictions_dict
=
{
BOX_ENCODINGS
:
box_encodings
,
CLASS_PREDICTIONS_WITH_BACKGROUND
:
class_predictions_with_background
}
if
self
.
_predict_instance_masks
:
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
):
upsampled_features
=
tf
.
image
.
resize_bilinear
(
image_features
,
[
self
.
_mask_height
,
self
.
_mask_width
],
align_corners
=
True
)
Returns:
An integer containing the number of convolution channels used by mask
predictor.
"""
num_feature_channels_log
=
math
.
log
(
float
(
num_feature_channels
),
2.0
)
num_classes_log
=
math
.
log
(
float
(
num_classes
),
2.0
)
weighted_num_feature_channels_log
=
(
num_feature_channels_log
*
feature_weight
)
weighted_num_classes_log
=
num_classes_log
*
class_weight
total_weight
=
feature_weight
+
class_weight
num_conv_channels_log
=
round
(
(
weighted_num_feature_channels_log
+
weighted_num_classes_log
)
/
total_weight
)
return
int
(
math
.
pow
(
2.0
,
num_conv_channels_log
))
def
_predict_masks
(
self
,
image_features
):
"""Performs mask prediction.
Args:
image_features: A float tensor of shape [batch_size, height, width,
channels] containing features for a batch of images.
Returns:
instance_masks: A float tensor of shape
[batch_size, 1, num_classes, image_height, image_width].
"""
num_conv_channels
=
self
.
_mask_prediction_conv_depth
if
num_conv_channels
==
0
:
num_feature_channels
=
image_features
.
get_shape
().
as_list
()[
3
]
num_conv_channels
=
self
.
_get_mask_predictor_conv_depth
(
num_feature_channels
,
self
.
num_classes
)
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
):
upsampled_features
=
tf
.
image
.
resize_bilinear
(
image_features
,
[
self
.
_mask_height
,
self
.
_mask_width
],
align_corners
=
True
)
for
_
in
range
(
self
.
_mask_prediction_num_conv_layers
-
1
):
upsampled_features
=
slim
.
conv2d
(
upsampled_features
,
num_outputs
=
self
.
_mask_prediction_conv_depth
,
kernel_size
=
[
2
,
2
])
mask_predictions
=
slim
.
conv2d
(
upsampled_features
,
num_outputs
=
self
.
num_classes
,
activation_fn
=
None
,
kernel_size
=
[
3
,
3
])
instance_masks
=
tf
.
expand_dims
(
tf
.
transpose
(
mask_predictions
,
perm
=
[
0
,
3
,
1
,
2
]),
axis
=
1
,
name
=
'MaskPredictor'
)
predictions_dict
[
MASK_PREDICTIONS
]
=
instance_masks
num_outputs
=
num_conv_channels
,
kernel_size
=
[
3
,
3
])
mask_predictions
=
slim
.
conv2d
(
upsampled_features
,
num_outputs
=
self
.
num_classes
,
activation_fn
=
None
,
kernel_size
=
[
3
,
3
])
return
tf
.
expand_dims
(
tf
.
transpose
(
mask_predictions
,
perm
=
[
0
,
3
,
1
,
2
]),
axis
=
1
,
name
=
'MaskPredictor'
)
def
_predict
(
self
,
image_features
,
num_predictions_per_location
,
predict_boxes_and_classes
=
True
,
predict_auxiliary_outputs
=
False
):
"""Optionally computes encoded object locations, confidences, and masks.
Flattens image_features and applies fully connected ops (with no
non-linearity) to predict box encodings and class predictions. In this
setting, anchors are not spatially arranged in any way and are assumed to
have been folded into the batch dimension. Thus we output 1 for the
anchors dimension.
Also optionally predicts instance masks.
The mask prediction head is based on the Mask RCNN paper with the following
modifications: We replace the deconvolution layer with a bilinear resize
and a convolution.
Args:
image_features: A list of float tensors of shape [batch_size, height_i,
width_i, channels_i] containing features for a batch of images.
num_predictions_per_location: A list of integers representing the number
of box predictions to be made per spatial location for each feature map.
Currently, this must be set to [1], or an error will be raised.
predict_boxes_and_classes: If true, the function will perform box
refinement and classification.
predict_auxiliary_outputs: If true, the function will perform other
predictions such as mask, keypoint, boundaries, etc. if any.
Returns:
A dictionary containing the following tensors.
box_encodings: A float tensor of shape
[batch_size, 1, num_classes, code_size] representing the
location of the objects.
class_predictions_with_background: A float tensor of shape
[batch_size, 1, num_classes + 1] representing the class
predictions for the proposals.
If predict_masks is True the dictionary also contains:
instance_masks: A float tensor of shape
[batch_size, 1, num_classes, image_height, image_width]
If predict_keypoints is True the dictionary also contains:
keypoints: [batch_size, 1, num_keypoints, 2]
Raises:
ValueError: If num_predictions_per_location is not 1 or if both
predict_boxes_and_classes and predict_auxiliary_outputs are false or if
len(image_features) is not 1.
"""
if
(
len
(
num_predictions_per_location
)
!=
1
or
num_predictions_per_location
[
0
]
!=
1
):
raise
ValueError
(
'Currently FullyConnectedBoxPredictor only supports '
'predicting a single box per class per location.'
)
if
not
predict_boxes_and_classes
and
not
predict_auxiliary_outputs
:
raise
ValueError
(
'Should perform at least one prediction.'
)
if
len
(
image_features
)
!=
1
:
raise
ValueError
(
'length of `image_features` must be 1. Found {}'
.
format
(
len
(
image_features
)))
image_feature
=
image_features
[
0
]
num_predictions_per_location
=
num_predictions_per_location
[
0
]
predictions_dict
=
{}
if
predict_boxes_and_classes
:
(
box_encodings
,
class_predictions_with_background
)
=
self
.
_predict_boxes_and_classes
(
image_feature
)
predictions_dict
[
BOX_ENCODINGS
]
=
box_encodings
predictions_dict
[
CLASS_PREDICTIONS_WITH_BACKGROUND
]
=
class_predictions_with_background
if
self
.
_predict_instance_masks
and
predict_auxiliary_outputs
:
predictions_dict
[
MASK_PREDICTIONS
]
=
self
.
_predict_masks
(
image_feature
)
return
predictions_dict
class
_NoopVariableScope
(
object
):
"""A dummy class that does not push any scope."""
def
__enter__
(
self
):
return
None
def
__exit__
(
self
,
exc_type
,
exc_value
,
traceback
):
return
False
class
ConvolutionalBoxPredictor
(
BoxPredictor
):
"""Convolutional Box Predictor.
...
...
@@ -450,7 +582,8 @@ class ConvolutionalBoxPredictor(BoxPredictor):
kernel_size
,
box_code_size
,
apply_sigmoid_to_scores
=
False
,
class_prediction_bias_init
=
0.0
):
class_prediction_bias_init
=
0.0
,
use_depthwise
=
False
):
"""Constructor.
Args:
...
...
@@ -479,6 +612,8 @@ class ConvolutionalBoxPredictor(BoxPredictor):
class_predictions.
class_prediction_bias_init: constant value to initialize bias of the last
conv2d layer before class prediction.
use_depthwise: Whether to use depthwise convolutions for prediction
steps. Default is False.
Raises:
ValueError: if min_depth > max_depth.
...
...
@@ -496,15 +631,17 @@ class ConvolutionalBoxPredictor(BoxPredictor):
self
.
_dropout_keep_prob
=
dropout_keep_prob
self
.
_apply_sigmoid_to_scores
=
apply_sigmoid_to_scores
self
.
_class_prediction_bias_init
=
class_prediction_bias_init
self
.
_use_depthwise
=
use_depthwise
def
_predict
(
self
,
image_features
,
num_predictions_per_location
):
def
_predict
(
self
,
image_features
,
num_predictions_per_location
_list
):
"""Computes encoded object locations and corresponding confidences.
Args:
image_features: A float tensor of shape [batch_size, height, width,
channels] containing features for a batch of images.
num_predictions_per_location: an integer representing the number of box
predictions to be made per spatial location in the feature map.
image_features: A list of float tensors of shape [batch_size, height_i,
width_i, channels_i] containing features for a batch of images.
num_predictions_per_location_list: A list of integers representing the
number of box predictions to be made per spatial location for each
feature map.
Returns:
A dictionary containing the following tensors.
...
...
@@ -514,53 +651,245 @@ class ConvolutionalBoxPredictor(BoxPredictor):
class_predictions_with_background: A float tensor of shape
[batch_size, num_anchors, num_classes + 1] representing the class
predictions for the proposals.
"""
# Add a slot for the background class.
num_class_slots
=
self
.
num_classes
+
1
net
=
image_features
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
),
\
slim
.
arg_scope
([
slim
.
dropout
],
is_training
=
self
.
_is_training
):
# Add additional conv layers before the class predictor.
features_depth
=
static_shape
.
get_depth
(
image_features
.
get_shape
())
depth
=
max
(
min
(
features_depth
,
self
.
_max_depth
),
self
.
_min_depth
)
tf
.
logging
.
info
(
'depth of additional conv before box predictor: {}'
.
format
(
depth
))
if
depth
>
0
and
self
.
_num_layers_before_predictor
>
0
:
for
i
in
range
(
self
.
_num_layers_before_predictor
):
net
=
slim
.
conv2d
(
net
,
depth
,
[
1
,
1
],
scope
=
'Conv2d_%d_1x1_%d'
%
(
i
,
depth
))
with
slim
.
arg_scope
([
slim
.
conv2d
],
activation_fn
=
None
,
normalizer_fn
=
None
,
normalizer_params
=
None
):
box_encodings
=
slim
.
conv2d
(
net
,
num_predictions_per_location
*
self
.
_box_code_size
,
[
self
.
_kernel_size
,
self
.
_kernel_size
],
scope
=
'BoxEncodingPredictor'
)
if
self
.
_use_dropout
:
net
=
slim
.
dropout
(
net
,
keep_prob
=
self
.
_dropout_keep_prob
)
class_predictions_with_background
=
slim
.
conv2d
(
net
,
num_predictions_per_location
*
num_class_slots
,
[
self
.
_kernel_size
,
self
.
_kernel_size
],
scope
=
'ClassPredictor'
,
biases_initializer
=
tf
.
constant_initializer
(
self
.
_class_prediction_bias_init
))
if
self
.
_apply_sigmoid_to_scores
:
class_predictions_with_background
=
tf
.
sigmoid
(
class_predictions_with_background
)
combined_feature_map_shape
=
shape_utils
.
combined_static_and_dynamic_shape
(
image_features
)
box_encodings
=
tf
.
reshape
(
box_encodings
,
tf
.
stack
([
combined_feature_map_shape
[
0
],
combined_feature_map_shape
[
1
]
*
combined_feature_map_shape
[
2
]
*
num_predictions_per_location
,
1
,
self
.
_box_code_size
]))
class_predictions_with_background
=
tf
.
reshape
(
class_predictions_with_background
,
tf
.
stack
([
combined_feature_map_shape
[
0
],
combined_feature_map_shape
[
1
]
*
combined_feature_map_shape
[
2
]
*
num_predictions_per_location
,
num_class_slots
]))
return
{
BOX_ENCODINGS
:
box_encodings
,
box_encodings_list
=
[]
class_predictions_list
=
[]
# TODO: Come up with a better way to generate scope names
# in box predictor once we have time to retrain all models in the zoo.
# The following lines create scope names to be backwards compatible with the
# existing checkpoints.
box_predictor_scopes
=
[
_NoopVariableScope
()]
if
len
(
image_features
)
>
1
:
box_predictor_scopes
=
[
tf
.
variable_scope
(
'BoxPredictor_{}'
.
format
(
i
))
for
i
in
range
(
len
(
image_features
))
]
for
(
image_feature
,
num_predictions_per_location
,
box_predictor_scope
)
in
zip
(
image_features
,
num_predictions_per_location_list
,
box_predictor_scopes
):
with
box_predictor_scope
:
# Add a slot for the background class.
num_class_slots
=
self
.
num_classes
+
1
net
=
image_feature
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
),
\
slim
.
arg_scope
([
slim
.
dropout
],
is_training
=
self
.
_is_training
):
# Add additional conv layers before the class predictor.
features_depth
=
static_shape
.
get_depth
(
image_feature
.
get_shape
())
depth
=
max
(
min
(
features_depth
,
self
.
_max_depth
),
self
.
_min_depth
)
tf
.
logging
.
info
(
'depth of additional conv before box predictor: {}'
.
format
(
depth
))
if
depth
>
0
and
self
.
_num_layers_before_predictor
>
0
:
for
i
in
range
(
self
.
_num_layers_before_predictor
):
net
=
slim
.
conv2d
(
net
,
depth
,
[
1
,
1
],
scope
=
'Conv2d_%d_1x1_%d'
%
(
i
,
depth
))
with
slim
.
arg_scope
([
slim
.
conv2d
],
activation_fn
=
None
,
normalizer_fn
=
None
,
normalizer_params
=
None
):
if
self
.
_use_depthwise
:
box_encodings
=
slim
.
separable_conv2d
(
net
,
None
,
[
self
.
_kernel_size
,
self
.
_kernel_size
],
padding
=
'SAME'
,
depth_multiplier
=
1
,
stride
=
1
,
rate
=
1
,
scope
=
'BoxEncodingPredictor_depthwise'
)
box_encodings
=
slim
.
conv2d
(
box_encodings
,
num_predictions_per_location
*
self
.
_box_code_size
,
[
1
,
1
],
scope
=
'BoxEncodingPredictor'
)
else
:
box_encodings
=
slim
.
conv2d
(
net
,
num_predictions_per_location
*
self
.
_box_code_size
,
[
self
.
_kernel_size
,
self
.
_kernel_size
],
scope
=
'BoxEncodingPredictor'
)
if
self
.
_use_dropout
:
net
=
slim
.
dropout
(
net
,
keep_prob
=
self
.
_dropout_keep_prob
)
if
self
.
_use_depthwise
:
class_predictions_with_background
=
slim
.
separable_conv2d
(
net
,
None
,
[
self
.
_kernel_size
,
self
.
_kernel_size
],
padding
=
'SAME'
,
depth_multiplier
=
1
,
stride
=
1
,
rate
=
1
,
scope
=
'ClassPredictor_depthwise'
)
class_predictions_with_background
=
slim
.
conv2d
(
class_predictions_with_background
,
num_predictions_per_location
*
num_class_slots
,
[
1
,
1
],
scope
=
'ClassPredictor'
)
else
:
class_predictions_with_background
=
slim
.
conv2d
(
net
,
num_predictions_per_location
*
num_class_slots
,
[
self
.
_kernel_size
,
self
.
_kernel_size
],
scope
=
'ClassPredictor'
,
biases_initializer
=
tf
.
constant_initializer
(
self
.
_class_prediction_bias_init
))
if
self
.
_apply_sigmoid_to_scores
:
class_predictions_with_background
=
tf
.
sigmoid
(
class_predictions_with_background
)
combined_feature_map_shape
=
(
shape_utils
.
combined_static_and_dynamic_shape
(
image_feature
))
box_encodings
=
tf
.
reshape
(
box_encodings
,
tf
.
stack
([
combined_feature_map_shape
[
0
],
combined_feature_map_shape
[
1
]
*
combined_feature_map_shape
[
2
]
*
num_predictions_per_location
,
1
,
self
.
_box_code_size
]))
box_encodings_list
.
append
(
box_encodings
)
class_predictions_with_background
=
tf
.
reshape
(
class_predictions_with_background
,
tf
.
stack
([
combined_feature_map_shape
[
0
],
combined_feature_map_shape
[
1
]
*
combined_feature_map_shape
[
2
]
*
num_predictions_per_location
,
num_class_slots
]))
class_predictions_list
.
append
(
class_predictions_with_background
)
return
{
BOX_ENCODINGS
:
tf
.
concat
(
box_encodings_list
,
axis
=
1
),
CLASS_PREDICTIONS_WITH_BACKGROUND
:
class_predictions_with_background
}
tf
.
concat
(
class_predictions_list
,
axis
=
1
)}
# TODO: Merge the implementation with ConvolutionalBoxPredictor above
# since they are very similar.
class
WeightSharedConvolutionalBoxPredictor
(
BoxPredictor
):
"""Convolutional Box Predictor with weight sharing.
Defines the box predictor as defined in
https://arxiv.org/abs/1708.02002. This class differs from
ConvolutionalBoxPredictor in that it shares weights and biases while
predicting from different feature maps. Separate multi-layer towers are
constructed for the box encoding and class predictors respectively.
"""
def
__init__
(
self
,
is_training
,
num_classes
,
conv_hyperparams
,
depth
,
num_layers_before_predictor
,
box_code_size
,
kernel_size
=
3
,
class_prediction_bias_init
=
0.0
):
"""Constructor.
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: number of classes. Note that num_classes *does not*
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
conv_hyperparams: Slim arg_scope with hyperparameters for convolution ops.
depth: depth of conv layers.
num_layers_before_predictor: Number of the additional conv layers before
the predictor.
box_code_size: Size of encoding for each box.
kernel_size: Size of final convolution kernel.
class_prediction_bias_init: constant value to initialize bias of the last
conv2d layer before class prediction.
"""
super
(
WeightSharedConvolutionalBoxPredictor
,
self
).
__init__
(
is_training
,
num_classes
)
self
.
_conv_hyperparams
=
conv_hyperparams
self
.
_depth
=
depth
self
.
_num_layers_before_predictor
=
num_layers_before_predictor
self
.
_box_code_size
=
box_code_size
self
.
_kernel_size
=
kernel_size
self
.
_class_prediction_bias_init
=
class_prediction_bias_init
def
_predict
(
self
,
image_features
,
num_predictions_per_location_list
):
"""Computes encoded object locations and corresponding confidences.
Args:
image_features: A list of float tensors of shape [batch_size, height_i,
width_i, channels] containing features for a batch of images. Note that
all tensors in the list must have the same number of channels.
num_predictions_per_location_list: A list of integers representing the
number of box predictions to be made per spatial location for each
feature map. Note that all values must be the same since the weights are
shared.
Returns:
A dictionary containing the following tensors.
box_encodings: A float tensor of shape [batch_size, num_anchors, 1,
code_size] representing the location of the objects, where
num_anchors = feat_height * feat_width * num_predictions_per_location
class_predictions_with_background: A float tensor of shape
[batch_size, num_anchors, num_classes + 1] representing the class
predictions for the proposals.
Raises:
ValueError: If the image feature maps do not have the same number of
channels or if the num predictions per locations is differs between the
feature maps.
"""
if
len
(
set
(
num_predictions_per_location_list
))
>
1
:
raise
ValueError
(
'num predictions per location must be same for all'
'feature maps, found: {}'
.
format
(
num_predictions_per_location_list
))
feature_channels
=
[
image_feature
.
shape
[
3
].
value
for
image_feature
in
image_features
]
if
len
(
set
(
feature_channels
))
>
1
:
raise
ValueError
(
'all feature maps must have the same number of '
'channels, found: {}'
.
format
(
feature_channels
))
box_encodings_list
=
[]
class_predictions_list
=
[]
for
(
image_feature
,
num_predictions_per_location
)
in
zip
(
image_features
,
num_predictions_per_location_list
):
# Add a slot for the background class.
with
tf
.
variable_scope
(
'WeightSharedConvolutionalBoxPredictor'
,
reuse
=
tf
.
AUTO_REUSE
):
num_class_slots
=
self
.
num_classes
+
1
box_encodings_net
=
image_feature
class_predictions_net
=
image_feature
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
):
for
i
in
range
(
self
.
_num_layers_before_predictor
):
box_encodings_net
=
slim
.
conv2d
(
box_encodings_net
,
self
.
_depth
,
[
self
.
_kernel_size
,
self
.
_kernel_size
],
stride
=
1
,
padding
=
'SAME'
,
scope
=
'BoxEncodingPredictionTower/conv2d_{}'
.
format
(
i
))
box_encodings
=
slim
.
conv2d
(
box_encodings_net
,
num_predictions_per_location
*
self
.
_box_code_size
,
[
self
.
_kernel_size
,
self
.
_kernel_size
],
activation_fn
=
None
,
stride
=
1
,
padding
=
'SAME'
,
scope
=
'BoxEncodingPredictor'
)
for
i
in
range
(
self
.
_num_layers_before_predictor
):
class_predictions_net
=
slim
.
conv2d
(
class_predictions_net
,
self
.
_depth
,
[
self
.
_kernel_size
,
self
.
_kernel_size
],
stride
=
1
,
padding
=
'SAME'
,
scope
=
'ClassPredictionTower/conv2d_{}'
.
format
(
i
))
class_predictions_with_background
=
slim
.
conv2d
(
class_predictions_net
,
num_predictions_per_location
*
num_class_slots
,
[
self
.
_kernel_size
,
self
.
_kernel_size
],
activation_fn
=
None
,
stride
=
1
,
padding
=
'SAME'
,
biases_initializer
=
tf
.
constant_initializer
(
self
.
_class_prediction_bias_init
),
scope
=
'ClassPredictor'
)
combined_feature_map_shape
=
(
shape_utils
.
combined_static_and_dynamic_shape
(
image_feature
))
box_encodings
=
tf
.
reshape
(
box_encodings
,
tf
.
stack
([
combined_feature_map_shape
[
0
],
combined_feature_map_shape
[
1
]
*
combined_feature_map_shape
[
2
]
*
num_predictions_per_location
,
1
,
self
.
_box_code_size
]))
box_encodings_list
.
append
(
box_encodings
)
class_predictions_with_background
=
tf
.
reshape
(
class_predictions_with_background
,
tf
.
stack
([
combined_feature_map_shape
[
0
],
combined_feature_map_shape
[
1
]
*
combined_feature_map_shape
[
2
]
*
num_predictions_per_location
,
num_class_slots
]))
class_predictions_list
.
append
(
class_predictions_with_background
)
return
{
BOX_ENCODINGS
:
tf
.
concat
(
box_encodings_list
,
axis
=
1
),
CLASS_PREDICTIONS_WITH_BACKGROUND
:
tf
.
concat
(
class_predictions_list
,
axis
=
1
)}
research/object_detection/core/box_predictor_test.py
View file @
fd7b6887
...
...
@@ -14,7 +14,6 @@
# ==============================================================================
"""Tests for object_detection.core.box_predictor."""
import
numpy
as
np
import
tensorflow
as
tf
...
...
@@ -22,6 +21,7 @@ from google.protobuf import text_format
from
object_detection.builders
import
hyperparams_builder
from
object_detection.core
import
box_predictor
from
object_detection.protos
import
hyperparams_pb2
from
object_detection.utils
import
test_case
class
MaskRCNNBoxPredictorTest
(
tf
.
test
.
TestCase
):
...
...
@@ -55,7 +55,8 @@ class MaskRCNNBoxPredictorTest(tf.test.TestCase):
box_code_size
=
4
,
)
box_predictions
=
mask_box_predictor
.
predict
(
image_features
,
num_predictions_per_location
=
1
,
scope
=
'BoxPredictor'
)
[
image_features
],
num_predictions_per_location
=
[
1
],
scope
=
'BoxPredictor'
)
box_encodings
=
box_predictions
[
box_predictor
.
BOX_ENCODINGS
]
class_predictions_with_background
=
box_predictions
[
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
]
...
...
@@ -93,12 +94,16 @@ class MaskRCNNBoxPredictorTest(tf.test.TestCase):
op_type
=
hyperparams_pb2
.
Hyperparams
.
CONV
),
predict_instance_masks
=
True
)
box_predictions
=
mask_box_predictor
.
predict
(
image_features
,
num_predictions_per_location
=
1
,
scope
=
'BoxPredictor'
)
[
image_features
],
num_predictions_per_location
=
[
1
],
scope
=
'BoxPredictor'
,
predict_boxes_and_classes
=
True
,
predict_auxiliary_outputs
=
True
)
mask_predictions
=
box_predictions
[
box_predictor
.
MASK_PREDICTIONS
]
self
.
assertListEqual
([
2
,
1
,
5
,
14
,
14
],
mask_predictions
.
get_shape
().
as_list
())
def
test_do_not_return_instance_masks_
and_keypoints_
without_request
(
self
):
def
test_do_not_return_instance_masks_without_request
(
self
):
image_features
=
tf
.
random_uniform
([
2
,
7
,
7
,
3
],
dtype
=
tf
.
float32
)
mask_box_predictor
=
box_predictor
.
MaskRCNNBoxPredictor
(
is_training
=
False
,
...
...
@@ -108,7 +113,8 @@ class MaskRCNNBoxPredictorTest(tf.test.TestCase):
dropout_keep_prob
=
0.5
,
box_code_size
=
4
)
box_predictions
=
mask_box_predictor
.
predict
(
image_features
,
num_predictions_per_location
=
1
,
scope
=
'BoxPredictor'
)
[
image_features
],
num_predictions_per_location
=
[
1
],
scope
=
'BoxPredictor'
)
self
.
assertEqual
(
len
(
box_predictions
),
2
)
self
.
assertTrue
(
box_predictor
.
BOX_ENCODINGS
in
box_predictions
)
self
.
assertTrue
(
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
...
...
@@ -156,7 +162,8 @@ class RfcnBoxPredictorTest(tf.test.TestCase):
box_code_size
=
4
)
box_predictions
=
rfcn_box_predictor
.
predict
(
image_features
,
num_predictions_per_location
=
1
,
scope
=
'BoxPredictor'
,
[
image_features
],
num_predictions_per_location
=
[
1
],
scope
=
'BoxPredictor'
,
proposal_boxes
=
proposal_boxes
)
box_encodings
=
box_predictions
[
box_predictor
.
BOX_ENCODINGS
]
class_predictions_with_background
=
box_predictions
[
...
...
@@ -173,7 +180,7 @@ class RfcnBoxPredictorTest(tf.test.TestCase):
self
.
assertAllEqual
(
class_predictions_shape
,
[
8
,
1
,
3
])
class
ConvolutionalBoxPredictorTest
(
t
f
.
t
est
.
TestCase
):
class
ConvolutionalBoxPredictorTest
(
test
_case
.
TestCase
):
def
_build_arg_scope_with_conv_hyperparams
(
self
):
conv_hyperparams
=
hyperparams_pb2
.
Hyperparams
()
...
...
@@ -192,7 +199,94 @@ class ConvolutionalBoxPredictorTest(tf.test.TestCase):
return
hyperparams_builder
.
build
(
conv_hyperparams
,
is_training
=
True
)
def
test_get_boxes_for_five_aspect_ratios_per_location
(
self
):
image_features
=
tf
.
random_uniform
([
4
,
8
,
8
,
64
],
dtype
=
tf
.
float32
)
def
graph_fn
(
image_features
):
conv_box_predictor
=
box_predictor
.
ConvolutionalBoxPredictor
(
is_training
=
False
,
num_classes
=
0
,
conv_hyperparams
=
self
.
_build_arg_scope_with_conv_hyperparams
(),
min_depth
=
0
,
max_depth
=
32
,
num_layers_before_predictor
=
1
,
use_dropout
=
True
,
dropout_keep_prob
=
0.8
,
kernel_size
=
1
,
box_code_size
=
4
)
box_predictions
=
conv_box_predictor
.
predict
(
[
image_features
],
num_predictions_per_location
=
[
5
],
scope
=
'BoxPredictor'
)
box_encodings
=
box_predictions
[
box_predictor
.
BOX_ENCODINGS
]
objectness_predictions
=
box_predictions
[
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
]
return
(
box_encodings
,
objectness_predictions
)
image_features
=
np
.
random
.
rand
(
4
,
8
,
8
,
64
).
astype
(
np
.
float32
)
(
box_encodings
,
objectness_predictions
)
=
self
.
execute
(
graph_fn
,
[
image_features
])
self
.
assertAllEqual
(
box_encodings
.
shape
,
[
4
,
320
,
1
,
4
])
self
.
assertAllEqual
(
objectness_predictions
.
shape
,
[
4
,
320
,
1
])
def
test_get_boxes_for_one_aspect_ratio_per_location
(
self
):
def
graph_fn
(
image_features
):
conv_box_predictor
=
box_predictor
.
ConvolutionalBoxPredictor
(
is_training
=
False
,
num_classes
=
0
,
conv_hyperparams
=
self
.
_build_arg_scope_with_conv_hyperparams
(),
min_depth
=
0
,
max_depth
=
32
,
num_layers_before_predictor
=
1
,
use_dropout
=
True
,
dropout_keep_prob
=
0.8
,
kernel_size
=
1
,
box_code_size
=
4
)
box_predictions
=
conv_box_predictor
.
predict
(
[
image_features
],
num_predictions_per_location
=
[
1
],
scope
=
'BoxPredictor'
)
box_encodings
=
box_predictions
[
box_predictor
.
BOX_ENCODINGS
]
objectness_predictions
=
box_predictions
[
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
]
return
(
box_encodings
,
objectness_predictions
)
image_features
=
np
.
random
.
rand
(
4
,
8
,
8
,
64
).
astype
(
np
.
float32
)
(
box_encodings
,
objectness_predictions
)
=
self
.
execute
(
graph_fn
,
[
image_features
])
self
.
assertAllEqual
(
box_encodings
.
shape
,
[
4
,
64
,
1
,
4
])
self
.
assertAllEqual
(
objectness_predictions
.
shape
,
[
4
,
64
,
1
])
def
test_get_multi_class_predictions_for_five_aspect_ratios_per_location
(
self
):
num_classes_without_background
=
6
image_features
=
np
.
random
.
rand
(
4
,
8
,
8
,
64
).
astype
(
np
.
float32
)
def
graph_fn
(
image_features
):
conv_box_predictor
=
box_predictor
.
ConvolutionalBoxPredictor
(
is_training
=
False
,
num_classes
=
num_classes_without_background
,
conv_hyperparams
=
self
.
_build_arg_scope_with_conv_hyperparams
(),
min_depth
=
0
,
max_depth
=
32
,
num_layers_before_predictor
=
1
,
use_dropout
=
True
,
dropout_keep_prob
=
0.8
,
kernel_size
=
1
,
box_code_size
=
4
)
box_predictions
=
conv_box_predictor
.
predict
(
[
image_features
],
num_predictions_per_location
=
[
5
],
scope
=
'BoxPredictor'
)
box_encodings
=
box_predictions
[
box_predictor
.
BOX_ENCODINGS
]
class_predictions_with_background
=
box_predictions
[
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
]
return
(
box_encodings
,
class_predictions_with_background
)
(
box_encodings
,
class_predictions_with_background
)
=
self
.
execute
(
graph_fn
,
[
image_features
])
self
.
assertAllEqual
(
box_encodings
.
shape
,
[
4
,
320
,
1
,
4
])
self
.
assertAllEqual
(
class_predictions_with_background
.
shape
,
[
4
,
320
,
num_classes_without_background
+
1
])
def
test_get_predictions_with_feature_maps_of_dynamic_shape
(
self
):
image_features
=
tf
.
placeholder
(
dtype
=
tf
.
float32
,
shape
=
[
4
,
None
,
None
,
64
])
conv_box_predictor
=
box_predictor
.
ConvolutionalBoxPredictor
(
is_training
=
False
,
num_classes
=
0
,
...
...
@@ -206,22 +300,38 @@ class ConvolutionalBoxPredictorTest(tf.test.TestCase):
box_code_size
=
4
)
box_predictions
=
conv_box_predictor
.
predict
(
image_features
,
num_predictions_per_location
=
5
,
scope
=
'BoxPredictor'
)
[
image_features
],
num_predictions_per_location
=
[
5
],
scope
=
'BoxPredictor'
)
box_encodings
=
box_predictions
[
box_predictor
.
BOX_ENCODINGS
]
objectness_predictions
=
box_predictions
[
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
]
init_op
=
tf
.
global_variables_initializer
()
resolution
=
32
expected_num_anchors
=
resolution
*
resolution
*
5
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
(
box_encodings_shape
,
objectness_predictions_shape
)
=
sess
.
run
(
[
tf
.
shape
(
box_encodings
),
tf
.
shape
(
objectness_predictions
)])
self
.
assertAllEqual
(
box_encodings_shape
,
[
4
,
320
,
1
,
4
])
self
.
assertAllEqual
(
objectness_predictions_shape
,
[
4
,
320
,
1
])
[
tf
.
shape
(
box_encodings
),
tf
.
shape
(
objectness_predictions
)],
feed_dict
=
{
image_features
:
np
.
random
.
rand
(
4
,
resolution
,
resolution
,
64
)})
actual_variable_set
=
set
(
[
var
.
op
.
name
for
var
in
tf
.
trainable_variables
()])
self
.
assertAllEqual
(
box_encodings_shape
,
[
4
,
expected_num_anchors
,
1
,
4
])
self
.
assertAllEqual
(
objectness_predictions_shape
,
[
4
,
expected_num_anchors
,
1
])
expected_variable_set
=
set
([
'BoxPredictor/Conv2d_0_1x1_32/biases'
,
'BoxPredictor/Conv2d_0_1x1_32/weights'
,
'BoxPredictor/BoxEncodingPredictor/biases'
,
'BoxPredictor/BoxEncodingPredictor/weights'
,
'BoxPredictor/ClassPredictor/biases'
,
'BoxPredictor/ClassPredictor/weights'
])
self
.
assertEqual
(
expected_variable_set
,
actual_variable_set
)
def
test_
get_boxes_for_one_aspect_ratio_per_loca
tion
(
self
):
image_features
=
tf
.
random_uniform
([
4
,
8
,
8
,
64
],
dtype
=
tf
.
float32
)
def
test_
use_depthwise_convolu
tion
(
self
):
image_features
=
tf
.
placeholder
(
dtype
=
tf
.
float32
,
shape
=
[
4
,
None
,
None
,
64
]
)
conv_box_predictor
=
box_predictor
.
ConvolutionalBoxPredictor
(
is_training
=
False
,
num_classes
=
0
,
...
...
@@ -229,77 +339,210 @@ class ConvolutionalBoxPredictorTest(tf.test.TestCase):
min_depth
=
0
,
max_depth
=
32
,
num_layers_before_predictor
=
1
,
use_dropout
=
True
,
dropout_keep_prob
=
0.8
,
kernel_size
=
1
,
box_code_size
=
4
box_code_size
=
4
,
use_dropout
=
True
,
use_depthwise
=
True
)
box_predictions
=
conv_box_predictor
.
predict
(
image_features
,
num_predictions_per_location
=
1
,
scope
=
'BoxPredictor'
)
[
image_features
],
num_predictions_per_location
=
[
5
],
scope
=
'BoxPredictor'
)
box_encodings
=
box_predictions
[
box_predictor
.
BOX_ENCODINGS
]
objectness_predictions
=
box_predictions
[
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
]
init_op
=
tf
.
global_variables_initializer
()
resolution
=
32
expected_num_anchors
=
resolution
*
resolution
*
5
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
(
box_encodings_shape
,
objectness_predictions_shape
)
=
sess
.
run
(
[
tf
.
shape
(
box_encodings
),
tf
.
shape
(
objectness_predictions
)])
self
.
assertAllEqual
(
box_encodings_shape
,
[
4
,
64
,
1
,
4
])
self
.
assertAllEqual
(
objectness_predictions_shape
,
[
4
,
64
,
1
])
[
tf
.
shape
(
box_encodings
),
tf
.
shape
(
objectness_predictions
)],
feed_dict
=
{
image_features
:
np
.
random
.
rand
(
4
,
resolution
,
resolution
,
64
)})
actual_variable_set
=
set
(
[
var
.
op
.
name
for
var
in
tf
.
trainable_variables
()])
self
.
assertAllEqual
(
box_encodings_shape
,
[
4
,
expected_num_anchors
,
1
,
4
])
self
.
assertAllEqual
(
objectness_predictions_shape
,
[
4
,
expected_num_anchors
,
1
])
expected_variable_set
=
set
([
'BoxPredictor/Conv2d_0_1x1_32/biases'
,
'BoxPredictor/Conv2d_0_1x1_32/weights'
,
'BoxPredictor/BoxEncodingPredictor_depthwise/biases'
,
'BoxPredictor/BoxEncodingPredictor_depthwise/depthwise_weights'
,
'BoxPredictor/BoxEncodingPredictor/biases'
,
'BoxPredictor/BoxEncodingPredictor/weights'
,
'BoxPredictor/ClassPredictor_depthwise/biases'
,
'BoxPredictor/ClassPredictor_depthwise/depthwise_weights'
,
'BoxPredictor/ClassPredictor/biases'
,
'BoxPredictor/ClassPredictor/weights'
])
self
.
assertEqual
(
expected_variable_set
,
actual_variable_set
)
class
WeightSharedConvolutionalBoxPredictorTest
(
test_case
.
TestCase
):
def
_build_arg_scope_with_conv_hyperparams
(
self
):
conv_hyperparams
=
hyperparams_pb2
.
Hyperparams
()
conv_hyperparams_text_proto
=
"""
activation: RELU_6
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
"""
text_format
.
Merge
(
conv_hyperparams_text_proto
,
conv_hyperparams
)
return
hyperparams_builder
.
build
(
conv_hyperparams
,
is_training
=
True
)
def
test_get_boxes_for_five_aspect_ratios_per_location
(
self
):
def
graph_fn
(
image_features
):
conv_box_predictor
=
box_predictor
.
WeightSharedConvolutionalBoxPredictor
(
is_training
=
False
,
num_classes
=
0
,
conv_hyperparams
=
self
.
_build_arg_scope_with_conv_hyperparams
(),
depth
=
32
,
num_layers_before_predictor
=
1
,
box_code_size
=
4
)
box_predictions
=
conv_box_predictor
.
predict
(
[
image_features
],
num_predictions_per_location
=
[
5
],
scope
=
'BoxPredictor'
)
box_encodings
=
box_predictions
[
box_predictor
.
BOX_ENCODINGS
]
objectness_predictions
=
box_predictions
[
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
]
return
(
box_encodings
,
objectness_predictions
)
image_features
=
np
.
random
.
rand
(
4
,
8
,
8
,
64
).
astype
(
np
.
float32
)
(
box_encodings
,
objectness_predictions
)
=
self
.
execute
(
graph_fn
,
[
image_features
])
self
.
assertAllEqual
(
box_encodings
.
shape
,
[
4
,
320
,
1
,
4
])
self
.
assertAllEqual
(
objectness_predictions
.
shape
,
[
4
,
320
,
1
])
def
test_get_multi_class_predictions_for_five_aspect_ratios_per_location
(
self
):
num_classes_without_background
=
6
image_features
=
tf
.
random_uniform
([
4
,
8
,
8
,
64
],
dtype
=
tf
.
float32
)
conv_box_predictor
=
box_predictor
.
ConvolutionalBoxPredictor
(
is_training
=
False
,
num_classes
=
num_classes_without_background
,
conv_hyperparams
=
self
.
_build_arg_scope_with_conv_hyperparams
(),
min_depth
=
0
,
max_depth
=
32
,
num_layers_before_predictor
=
1
,
use_dropout
=
True
,
dropout_keep_prob
=
0.8
,
kernel_size
=
1
,
box_code_size
=
4
)
box_predictions
=
conv_box_predictor
.
predict
(
image_features
,
num_predictions_per_location
=
5
,
scope
=
'BoxPredictor'
)
box_encodings
=
box_predictions
[
box_predictor
.
BOX_ENCODINGS
]
class_predictions_with_background
=
box_predictions
[
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
]
def
graph_fn
(
image_features
):
conv_box_predictor
=
box_predictor
.
WeightSharedConvolutionalBoxPredictor
(
is_training
=
False
,
num_classes
=
num_classes_without_background
,
conv_hyperparams
=
self
.
_build_arg_scope_with_conv_hyperparams
(),
depth
=
32
,
num_layers_before_predictor
=
1
,
box_code_size
=
4
)
box_predictions
=
conv_box_predictor
.
predict
(
[
image_features
],
num_predictions_per_location
=
[
5
],
scope
=
'BoxPredictor'
)
box_encodings
=
box_predictions
[
box_predictor
.
BOX_ENCODINGS
]
class_predictions_with_background
=
box_predictions
[
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
]
return
(
box_encodings
,
class_predictions_with_background
)
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
(
box_encodings_shape
,
class_predictions_with_background_shape
)
=
sess
.
run
([
tf
.
shape
(
box_encodings
),
tf
.
shape
(
class_predictions_with_background
)])
self
.
assertAllEqual
(
box_encodings_shape
,
[
4
,
320
,
1
,
4
])
self
.
assertAllEqual
(
class_predictions_with_background_shape
,
[
4
,
320
,
num_classes_without_background
+
1
])
def
test_get_boxes_for_five_aspect_ratios_per_location_fully_convolutional
(
image_features
=
np
.
random
.
rand
(
4
,
8
,
8
,
64
).
astype
(
np
.
float32
)
(
box_encodings
,
class_predictions_with_background
)
=
self
.
execute
(
graph_fn
,
[
image_features
])
self
.
assertAllEqual
(
box_encodings
.
shape
,
[
4
,
320
,
1
,
4
])
self
.
assertAllEqual
(
class_predictions_with_background
.
shape
,
[
4
,
320
,
num_classes_without_background
+
1
])
def
test_get_multi_class_predictions_from_two_feature_maps
(
self
):
num_classes_without_background
=
6
def
graph_fn
(
image_features1
,
image_features2
):
conv_box_predictor
=
box_predictor
.
WeightSharedConvolutionalBoxPredictor
(
is_training
=
False
,
num_classes
=
num_classes_without_background
,
conv_hyperparams
=
self
.
_build_arg_scope_with_conv_hyperparams
(),
depth
=
32
,
num_layers_before_predictor
=
1
,
box_code_size
=
4
)
box_predictions
=
conv_box_predictor
.
predict
(
[
image_features1
,
image_features2
],
num_predictions_per_location
=
[
5
,
5
],
scope
=
'BoxPredictor'
)
box_encodings
=
box_predictions
[
box_predictor
.
BOX_ENCODINGS
]
class_predictions_with_background
=
box_predictions
[
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
]
return
(
box_encodings
,
class_predictions_with_background
)
image_features1
=
np
.
random
.
rand
(
4
,
8
,
8
,
64
).
astype
(
np
.
float32
)
image_features2
=
np
.
random
.
rand
(
4
,
8
,
8
,
64
).
astype
(
np
.
float32
)
(
box_encodings
,
class_predictions_with_background
)
=
self
.
execute
(
graph_fn
,
[
image_features1
,
image_features2
])
self
.
assertAllEqual
(
box_encodings
.
shape
,
[
4
,
640
,
1
,
4
])
self
.
assertAllEqual
(
class_predictions_with_background
.
shape
,
[
4
,
640
,
num_classes_without_background
+
1
])
def
test_predictions_from_multiple_feature_maps_share_weights
(
self
):
num_classes_without_background
=
6
def
graph_fn
(
image_features1
,
image_features2
):
conv_box_predictor
=
box_predictor
.
WeightSharedConvolutionalBoxPredictor
(
is_training
=
False
,
num_classes
=
num_classes_without_background
,
conv_hyperparams
=
self
.
_build_arg_scope_with_conv_hyperparams
(),
depth
=
32
,
num_layers_before_predictor
=
2
,
box_code_size
=
4
)
box_predictions
=
conv_box_predictor
.
predict
(
[
image_features1
,
image_features2
],
num_predictions_per_location
=
[
5
,
5
],
scope
=
'BoxPredictor'
)
box_encodings
=
box_predictions
[
box_predictor
.
BOX_ENCODINGS
]
class_predictions_with_background
=
box_predictions
[
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
]
return
(
box_encodings
,
class_predictions_with_background
)
with
self
.
test_session
(
graph
=
tf
.
Graph
()):
graph_fn
(
tf
.
random_uniform
([
4
,
32
,
32
,
3
],
dtype
=
tf
.
float32
),
tf
.
random_uniform
([
4
,
16
,
16
,
3
],
dtype
=
tf
.
float32
))
actual_variable_set
=
set
(
[
var
.
op
.
name
for
var
in
tf
.
trainable_variables
()])
expected_variable_set
=
set
([
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxEncodingPredictionTower/conv2d_0/weights'
),
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxEncodingPredictionTower/conv2d_0/biases'
),
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxEncodingPredictionTower/conv2d_1/weights'
),
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxEncodingPredictionTower/conv2d_1/biases'
),
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_0/weights'
),
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_0/biases'
),
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_1/weights'
),
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_1/biases'
),
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxEncodingPredictor/weights'
),
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxEncodingPredictor/biases'
),
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictor/weights'
),
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictor/biases'
)])
self
.
assertEqual
(
expected_variable_set
,
actual_variable_set
)
def
test_get_predictions_with_feature_maps_of_dynamic_shape
(
self
):
image_features
=
tf
.
placeholder
(
dtype
=
tf
.
float32
,
shape
=
[
4
,
None
,
None
,
64
])
conv_box_predictor
=
box_predictor
.
ConvolutionalBoxPredictor
(
conv_box_predictor
=
box_predictor
.
WeightShared
ConvolutionalBoxPredictor
(
is_training
=
False
,
num_classes
=
0
,
conv_hyperparams
=
self
.
_build_arg_scope_with_conv_hyperparams
(),
min_depth
=
0
,
max_depth
=
32
,
depth
=
32
,
num_layers_before_predictor
=
1
,
use_dropout
=
True
,
dropout_keep_prob
=
0.8
,
kernel_size
=
1
,
box_code_size
=
4
)
box_code_size
=
4
)
box_predictions
=
conv_box_predictor
.
predict
(
image_features
,
num_predictions_per_location
=
5
,
scope
=
'BoxPredictor'
)
[
image_features
],
num_predictions_per_location
=
[
5
],
scope
=
'BoxPredictor'
)
box_encodings
=
box_predictions
[
box_predictor
.
BOX_ENCODINGS
]
objectness_predictions
=
box_predictions
[
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
]
...
...
@@ -318,6 +561,5 @@ class ConvolutionalBoxPredictorTest(tf.test.TestCase):
self
.
assertAllEqual
(
objectness_predictions_shape
,
[
4
,
expected_num_anchors
,
1
])
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/core/losses.py
View file @
fd7b6887
...
...
@@ -50,8 +50,10 @@ class Loss(object):
"""Call the loss function.
Args:
prediction_tensor: a tensor representing predicted quantities.
target_tensor: a tensor representing regression or classification targets.
prediction_tensor: an N-d tensor of shape [batch, anchors, ...]
representing predicted quantities.
target_tensor: an N-d tensor of shape [batch, anchors, ...] representing
regression or classification targets.
ignore_nan_targets: whether to ignore nan targets in the loss computation.
E.g. can be used if the target tensor is missing groundtruth data that
shouldn't be factored into the loss.
...
...
@@ -81,7 +83,8 @@ class Loss(object):
the Loss.
Returns:
loss: a tensor representing the value of the loss function
loss: an N-d tensor of shape [batch, anchors, ...] containing the loss per
anchor
"""
pass
...
...
@@ -92,15 +95,6 @@ class WeightedL2LocalizationLoss(Loss):
Loss[b,a] = .5 * ||weights[b,a] * (prediction[b,a,:] - target[b,a,:])||^2
"""
def
__init__
(
self
,
anchorwise_output
=
False
):
"""Constructor.
Args:
anchorwise_output: Outputs loss per anchor. (default False)
"""
self
.
_anchorwise_output
=
anchorwise_output
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
weights
):
"""Compute loss function.
...
...
@@ -112,15 +106,13 @@ class WeightedL2LocalizationLoss(Loss):
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a
(scalar) tensor representing the value of the loss function
or a float tensor of shape [batch_size, num_anchors]
loss: a
float tensor of shape [batch_size, num_anchors] tensor
representing the value of the loss function.
"""
weighted_diff
=
(
prediction_tensor
-
target_tensor
)
*
tf
.
expand_dims
(
weights
,
2
)
square_diff
=
0.5
*
tf
.
square
(
weighted_diff
)
if
self
.
_anchorwise_output
:
return
tf
.
reduce_sum
(
square_diff
,
2
)
return
tf
.
reduce_sum
(
square_diff
)
return
tf
.
reduce_sum
(
square_diff
,
2
)
class
WeightedSmoothL1LocalizationLoss
(
Loss
):
...
...
@@ -132,15 +124,6 @@ class WeightedSmoothL1LocalizationLoss(Loss):
See also Equation (3) in the Fast R-CNN paper by Ross Girshick (ICCV 2015)
"""
def
__init__
(
self
,
anchorwise_output
=
False
):
"""Constructor.
Args:
anchorwise_output: Outputs loss per anchor. (default False)
"""
self
.
_anchorwise_output
=
anchorwise_output
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
weights
):
"""Compute loss function.
...
...
@@ -152,7 +135,8 @@ class WeightedSmoothL1LocalizationLoss(Loss):
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a (scalar) tensor representing the value of the loss function
loss: a float tensor of shape [batch_size, num_anchors] tensor
representing the value of the loss function.
"""
diff
=
prediction_tensor
-
target_tensor
abs_diff
=
tf
.
abs
(
diff
)
...
...
@@ -160,9 +144,7 @@ class WeightedSmoothL1LocalizationLoss(Loss):
anchorwise_smooth_l1norm
=
tf
.
reduce_sum
(
tf
.
where
(
abs_diff_lt_1
,
0.5
*
tf
.
square
(
abs_diff
),
abs_diff
-
0.5
),
2
)
*
weights
if
self
.
_anchorwise_output
:
return
anchorwise_smooth_l1norm
return
tf
.
reduce_sum
(
anchorwise_smooth_l1norm
)
return
anchorwise_smooth_l1norm
class
WeightedIOULocalizationLoss
(
Loss
):
...
...
@@ -184,27 +166,19 @@ class WeightedIOULocalizationLoss(Loss):
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a (scalar) tensor representing the value of the loss function
loss: a float tensor of shape [batch_size, num_anchors] tensor
representing the value of the loss function.
"""
predicted_boxes
=
box_list
.
BoxList
(
tf
.
reshape
(
prediction_tensor
,
[
-
1
,
4
]))
target_boxes
=
box_list
.
BoxList
(
tf
.
reshape
(
target_tensor
,
[
-
1
,
4
]))
per_anchor_iou_loss
=
1.0
-
box_list_ops
.
matched_iou
(
predicted_boxes
,
target_boxes
)
return
tf
.
reduce_sum
(
tf
.
reshape
(
weights
,
[
-
1
])
*
per_anchor_iou_loss
)
return
tf
.
reshape
(
weights
,
[
-
1
])
*
per_anchor_iou_loss
class
WeightedSigmoidClassificationLoss
(
Loss
):
"""Sigmoid cross entropy classification loss function."""
def
__init__
(
self
,
anchorwise_output
=
False
):
"""Constructor.
Args:
anchorwise_output: Outputs loss per anchor. (default False)
"""
self
.
_anchorwise_output
=
anchorwise_output
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
...
...
@@ -222,8 +196,8 @@ class WeightedSigmoidClassificationLoss(Loss):
If provided, computes loss only for the specified class indices.
Returns:
loss: a
(scalar) tensor representing the value of the loss function
or a float tensor of shape [batch_size, num_anchors]
loss: a
float tensor of shape [batch_size, num_anchors, num_classes]
representing the value of the loss function.
"""
weights
=
tf
.
expand_dims
(
weights
,
2
)
if
class_indices
is
not
None
:
...
...
@@ -233,9 +207,7 @@ class WeightedSigmoidClassificationLoss(Loss):
[
1
,
1
,
-
1
])
per_entry_cross_ent
=
(
tf
.
nn
.
sigmoid_cross_entropy_with_logits
(
labels
=
target_tensor
,
logits
=
prediction_tensor
))
if
self
.
_anchorwise_output
:
return
tf
.
reduce_sum
(
per_entry_cross_ent
*
weights
,
2
)
return
tf
.
reduce_sum
(
per_entry_cross_ent
*
weights
)
return
per_entry_cross_ent
*
weights
class
SigmoidFocalClassificationLoss
(
Loss
):
...
...
@@ -245,15 +217,13 @@ class SigmoidFocalClassificationLoss(Loss):
examples. See https://arxiv.org/pdf/1708.02002.pdf for the loss definition.
"""
def
__init__
(
self
,
anchorwise_output
=
False
,
gamma
=
2.0
,
alpha
=
0.25
):
def
__init__
(
self
,
gamma
=
2.0
,
alpha
=
0.25
):
"""Constructor.
Args:
anchorwise_output: Outputs loss per anchor. (default False)
gamma: exponent of the modulating factor (1 - p_t) ^ gamma.
alpha: optional alpha weighting factor to balance positives vs negatives.
"""
self
.
_anchorwise_output
=
anchorwise_output
self
.
_alpha
=
alpha
self
.
_gamma
=
gamma
...
...
@@ -274,8 +244,8 @@ class SigmoidFocalClassificationLoss(Loss):
If provided, computes loss only for the specified class indices.
Returns:
loss: a
(scalar) tensor representing the value of the loss function
or a float tensor of shape [batch_size, num_anchors]
loss: a
float tensor of shape [batch_size, num_anchors, num_classes]
representing the value of the loss function.
"""
weights
=
tf
.
expand_dims
(
weights
,
2
)
if
class_indices
is
not
None
:
...
...
@@ -297,25 +267,21 @@ class SigmoidFocalClassificationLoss(Loss):
(
1
-
target_tensor
)
*
(
1
-
self
.
_alpha
))
focal_cross_entropy_loss
=
(
modulating_factor
*
alpha_weight_factor
*
per_entry_cross_ent
)
if
self
.
_anchorwise_output
:
return
tf
.
reduce_sum
(
focal_cross_entropy_loss
*
weights
,
2
)
return
tf
.
reduce_sum
(
focal_cross_entropy_loss
*
weights
)
return
focal_cross_entropy_loss
*
weights
class
WeightedSoftmaxClassificationLoss
(
Loss
):
"""Softmax loss function."""
def
__init__
(
self
,
anchorwise_output
=
False
,
logit_scale
=
1.0
):
def
__init__
(
self
,
logit_scale
=
1.0
):
"""Constructor.
Args:
anchorwise_output: Whether to output loss per anchor (default False)
logit_scale: When this value is high, the prediction is "diffused" and
when this value is low, the prediction is made peakier.
(default 1.0)
"""
self
.
_anchorwise_output
=
anchorwise_output
self
.
_logit_scale
=
logit_scale
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
weights
):
...
...
@@ -329,7 +295,8 @@ class WeightedSoftmaxClassificationLoss(Loss):
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a (scalar) tensor representing the value of the loss function
loss: a float tensor of shape [batch_size, num_anchors]
representing the value of the loss function.
"""
num_classes
=
prediction_tensor
.
get_shape
().
as_list
()[
-
1
]
prediction_tensor
=
tf
.
divide
(
...
...
@@ -337,9 +304,7 @@ class WeightedSoftmaxClassificationLoss(Loss):
per_row_cross_ent
=
(
tf
.
nn
.
softmax_cross_entropy_with_logits
(
labels
=
tf
.
reshape
(
target_tensor
,
[
-
1
,
num_classes
]),
logits
=
tf
.
reshape
(
prediction_tensor
,
[
-
1
,
num_classes
])))
if
self
.
_anchorwise_output
:
return
tf
.
reshape
(
per_row_cross_ent
,
tf
.
shape
(
weights
))
*
weights
return
tf
.
reduce_sum
(
per_row_cross_ent
*
tf
.
reshape
(
weights
,
[
-
1
]))
return
tf
.
reshape
(
per_row_cross_ent
,
tf
.
shape
(
weights
))
*
weights
class
BootstrappedSigmoidClassificationLoss
(
Loss
):
...
...
@@ -359,14 +324,13 @@ class BootstrappedSigmoidClassificationLoss(Loss):
Reed et al. (ICLR 2015).
"""
def
__init__
(
self
,
alpha
,
bootstrap_type
=
'soft'
,
anchorwise_output
=
False
):
def
__init__
(
self
,
alpha
,
bootstrap_type
=
'soft'
):
"""Constructor.
Args:
alpha: a float32 scalar tensor between 0 and 1 representing interpolation
weight
bootstrap_type: set to either 'hard' or 'soft' (default)
anchorwise_output: Outputs loss per anchor. (default False)
Raises:
ValueError: if bootstrap_type is not either 'hard' or 'soft'
...
...
@@ -376,7 +340,6 @@ class BootstrappedSigmoidClassificationLoss(Loss):
'
\'
hard
\'
or
\'
soft.
\'
'
)
self
.
_alpha
=
alpha
self
.
_bootstrap_type
=
bootstrap_type
self
.
_anchorwise_output
=
anchorwise_output
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
weights
):
"""Compute loss function.
...
...
@@ -389,8 +352,8 @@ class BootstrappedSigmoidClassificationLoss(Loss):
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a
(scalar) tensor representing the value of the loss function
or a float tensor of shape [batch_size, num_anchors]
loss: a
float tensor of shape [batch_size, num_anchors, num_classes]
representing the value of the loss function.
"""
if
self
.
_bootstrap_type
==
'soft'
:
bootstrap_target_tensor
=
self
.
_alpha
*
target_tensor
+
(
...
...
@@ -401,9 +364,7 @@ class BootstrappedSigmoidClassificationLoss(Loss):
tf
.
sigmoid
(
prediction_tensor
)
>
0.5
,
tf
.
float32
)
per_entry_cross_ent
=
(
tf
.
nn
.
sigmoid_cross_entropy_with_logits
(
labels
=
bootstrap_target_tensor
,
logits
=
prediction_tensor
))
if
self
.
_anchorwise_output
:
return
tf
.
reduce_sum
(
per_entry_cross_ent
*
tf
.
expand_dims
(
weights
,
2
),
2
)
return
tf
.
reduce_sum
(
per_entry_cross_ent
*
tf
.
expand_dims
(
weights
,
2
))
return
per_entry_cross_ent
*
tf
.
expand_dims
(
weights
,
2
)
class
HardExampleMiner
(
object
):
...
...
research/object_detection/core/losses_test.py
View file @
fd7b6887
...
...
@@ -26,7 +26,7 @@ from object_detection.core import matcher
class
WeightedL2LocalizationLossTest
(
tf
.
test
.
TestCase
):
def
testReturnsCorrectLoss
(
self
):
def
testReturnsCorrect
Weighted
Loss
(
self
):
batch_size
=
3
num_anchors
=
10
code_size
=
4
...
...
@@ -36,7 +36,8 @@ class WeightedL2LocalizationLossTest(tf.test.TestCase):
[
1
,
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
,
0
],
[
1
,
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
,
0
]],
tf
.
float32
)
loss_op
=
losses
.
WeightedL2LocalizationLoss
()
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
loss
=
tf
.
reduce_sum
(
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
))
expected_loss
=
(
3
*
5
*
4
)
/
2.0
with
self
.
test_session
()
as
sess
:
...
...
@@ -50,7 +51,7 @@ class WeightedL2LocalizationLossTest(tf.test.TestCase):
prediction_tensor
=
tf
.
ones
([
batch_size
,
num_anchors
,
code_size
])
target_tensor
=
tf
.
zeros
([
batch_size
,
num_anchors
,
code_size
])
weights
=
tf
.
ones
([
batch_size
,
num_anchors
])
loss_op
=
losses
.
WeightedL2LocalizationLoss
(
anchorwise_output
=
True
)
loss_op
=
losses
.
WeightedL2LocalizationLoss
()
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
expected_loss
=
np
.
ones
((
batch_size
,
num_anchors
))
*
2
...
...
@@ -58,22 +59,6 @@ class WeightedL2LocalizationLossTest(tf.test.TestCase):
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
expected_loss
)
def
testReturnsCorrectLossSum
(
self
):
batch_size
=
3
num_anchors
=
16
code_size
=
4
prediction_tensor
=
tf
.
ones
([
batch_size
,
num_anchors
,
code_size
])
target_tensor
=
tf
.
zeros
([
batch_size
,
num_anchors
,
code_size
])
weights
=
tf
.
ones
([
batch_size
,
num_anchors
])
loss_op
=
losses
.
WeightedL2LocalizationLoss
(
anchorwise_output
=
False
)
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
expected_loss
=
tf
.
nn
.
l2_loss
(
prediction_tensor
-
target_tensor
)
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
expected_loss_output
=
sess
.
run
(
expected_loss
)
self
.
assertAllClose
(
loss_output
,
expected_loss_output
)
def
testReturnsCorrectNanLoss
(
self
):
batch_size
=
3
num_anchors
=
10
...
...
@@ -87,6 +72,7 @@ class WeightedL2LocalizationLossTest(tf.test.TestCase):
loss_op
=
losses
.
WeightedL2LocalizationLoss
()
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
,
ignore_nan_targets
=
True
)
loss
=
tf
.
reduce_sum
(
loss
)
expected_loss
=
(
3
*
5
*
4
)
/
2.0
with
self
.
test_session
()
as
sess
:
...
...
@@ -111,6 +97,7 @@ class WeightedSmoothL1LocalizationLossTest(tf.test.TestCase):
[
0
,
3
,
0
]],
tf
.
float32
)
loss_op
=
losses
.
WeightedSmoothL1LocalizationLoss
()
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
loss
=
tf
.
reduce_sum
(
loss
)
exp_loss
=
7.695
with
self
.
test_session
()
as
sess
:
...
...
@@ -130,6 +117,7 @@ class WeightedIOULocalizationLossTest(tf.test.TestCase):
weights
=
[[
1.0
,
.
5
,
2.0
]]
loss_op
=
losses
.
WeightedIOULocalizationLoss
()
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
loss
=
tf
.
reduce_sum
(
loss
)
exp_loss
=
2.0
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
...
...
@@ -159,6 +147,7 @@ class WeightedSigmoidClassificationLossTest(tf.test.TestCase):
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
loss_op
=
losses
.
WeightedSigmoidClassificationLoss
()
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
loss
=
tf
.
reduce_sum
(
loss
)
exp_loss
=
-
2
*
math
.
log
(.
5
)
with
self
.
test_session
()
as
sess
:
...
...
@@ -184,8 +173,9 @@ class WeightedSigmoidClassificationLossTest(tf.test.TestCase):
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
loss_op
=
losses
.
WeightedSigmoidClassificationLoss
(
True
)
loss_op
=
losses
.
WeightedSigmoidClassificationLoss
()
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
loss
=
tf
.
reduce_sum
(
loss
,
axis
=
2
)
exp_loss
=
np
.
matrix
([[
0
,
0
,
-
math
.
log
(.
5
),
0
],
[
-
math
.
log
(.
5
),
0
,
0
,
0
]])
...
...
@@ -214,9 +204,10 @@ class WeightedSigmoidClassificationLossTest(tf.test.TestCase):
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
# Ignores the last class.
class_indices
=
tf
.
constant
([
0
,
1
,
2
],
tf
.
int32
)
loss_op
=
losses
.
WeightedSigmoidClassificationLoss
(
True
)
loss_op
=
losses
.
WeightedSigmoidClassificationLoss
()
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
,
class_indices
=
class_indices
)
loss
=
tf
.
reduce_sum
(
loss
,
axis
=
2
)
exp_loss
=
np
.
matrix
([[
0
,
0
,
-
math
.
log
(.
5
),
0
],
[
-
math
.
log
(.
5
),
0
,
0
,
0
]])
...
...
@@ -245,14 +236,13 @@ class SigmoidFocalClassificationLossTest(tf.test.TestCase):
[
0
],
[
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
,
1
,
1
]],
tf
.
float32
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
True
,
gamma
=
2.0
,
alpha
=
None
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
(
anchorwise_output
=
True
)
focal_loss
=
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
sigmoid_loss
=
sigmoid_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
gamma
=
2.0
,
alpha
=
None
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
()
focal_loss
=
tf
.
reduce_sum
(
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
),
axis
=
2
)
sigmoid_loss
=
tf
.
reduce_sum
(
sigmoid_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
),
axis
=
2
)
with
self
.
test_session
()
as
sess
:
sigmoid_loss
,
focal_loss
=
sess
.
run
([
sigmoid_loss
,
focal_loss
])
...
...
@@ -272,14 +262,13 @@ class SigmoidFocalClassificationLossTest(tf.test.TestCase):
[
0
],
[
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
,
1
]],
tf
.
float32
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
True
,
gamma
=
2.0
,
alpha
=
None
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
(
anchorwise_output
=
True
)
focal_loss
=
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
sigmoid_loss
=
sigmoid_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
gamma
=
2.0
,
alpha
=
None
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
()
focal_loss
=
tf
.
reduce_sum
(
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
),
axis
=
2
)
sigmoid_loss
=
tf
.
reduce_sum
(
sigmoid_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
),
axis
=
2
)
with
self
.
test_session
()
as
sess
:
sigmoid_loss
,
focal_loss
=
sess
.
run
([
sigmoid_loss
,
focal_loss
])
...
...
@@ -299,14 +288,13 @@ class SigmoidFocalClassificationLossTest(tf.test.TestCase):
[
0
],
[
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
,
1
]],
tf
.
float32
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
False
,
gamma
=
2.0
,
alpha
=
None
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
(
anchorwise_output
=
False
)
focal_loss
=
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
sigmoid_loss
=
sigmoid_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
gamma
=
2.0
,
alpha
=
None
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
()
focal_loss
=
tf
.
reduce_sum
(
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
))
sigmoid_loss
=
tf
.
reduce_sum
(
sigmoid_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
))
with
self
.
test_session
()
as
sess
:
sigmoid_loss
,
focal_loss
=
sess
.
run
([
sigmoid_loss
,
focal_loss
])
...
...
@@ -326,14 +314,13 @@ class SigmoidFocalClassificationLossTest(tf.test.TestCase):
[
0
],
[
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
,
1
]],
tf
.
float32
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
True
,
gamma
=
2.0
,
alpha
=
1.0
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
(
anchorwise_output
=
True
)
focal_loss
=
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
sigmoid_loss
=
sigmoid_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
gamma
=
2.0
,
alpha
=
1.0
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
()
focal_loss
=
tf
.
reduce_sum
(
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
),
axis
=
2
)
sigmoid_loss
=
tf
.
reduce_sum
(
sigmoid_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
),
axis
=
2
)
with
self
.
test_session
()
as
sess
:
sigmoid_loss
,
focal_loss
=
sess
.
run
([
sigmoid_loss
,
focal_loss
])
...
...
@@ -355,14 +342,13 @@ class SigmoidFocalClassificationLossTest(tf.test.TestCase):
[
0
],
[
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
,
1
]],
tf
.
float32
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
True
,
gamma
=
2.0
,
alpha
=
0.0
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
(
anchorwise_output
=
True
)
focal_loss
=
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
sigmoid_loss
=
sigmoid_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
gamma
=
2.0
,
alpha
=
0.0
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
()
focal_loss
=
tf
.
reduce_sum
(
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
),
axis
=
2
)
sigmoid_loss
=
tf
.
reduce_sum
(
sigmoid_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
),
axis
=
2
)
with
self
.
test_session
()
as
sess
:
sigmoid_loss
,
focal_loss
=
sess
.
run
([
sigmoid_loss
,
focal_loss
])
...
...
@@ -391,10 +377,8 @@ class SigmoidFocalClassificationLossTest(tf.test.TestCase):
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
True
,
alpha
=
0.5
,
gamma
=
0.0
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
(
anchorwise_output
=
True
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
alpha
=
0.5
,
gamma
=
0.0
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
()
focal_loss
=
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
sigmoid_loss
=
sigmoid_loss_op
(
prediction_tensor
,
target_tensor
,
...
...
@@ -423,10 +407,8 @@ class SigmoidFocalClassificationLossTest(tf.test.TestCase):
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
True
,
alpha
=
None
,
gamma
=
0.0
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
(
anchorwise_output
=
True
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
alpha
=
None
,
gamma
=
0.0
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
()
focal_loss
=
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
sigmoid_loss
=
sigmoid_loss_op
(
prediction_tensor
,
target_tensor
,
...
...
@@ -456,11 +438,10 @@ class SigmoidFocalClassificationLossTest(tf.test.TestCase):
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
1
]],
tf
.
float32
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
False
,
alpha
=
1.0
,
gamma
=
0.0
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
alpha
=
1.0
,
gamma
=
0.0
)
focal_loss
=
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
focal_loss
=
tf
.
reduce_sum
(
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
)
with
self
.
test_session
()
as
sess
:
focal_loss
=
sess
.
run
(
focal_loss
)
self
.
assertAllClose
(
...
...
@@ -489,11 +470,10 @@ class SigmoidFocalClassificationLossTest(tf.test.TestCase):
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
1
]],
tf
.
float32
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
False
,
alpha
=
0.75
,
gamma
=
0.0
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
alpha
=
0.75
,
gamma
=
0.0
)
focal_loss
=
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
focal_loss
=
tf
.
reduce_sum
(
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
)
with
self
.
test_session
()
as
sess
:
focal_loss
=
sess
.
run
(
focal_loss
)
self
.
assertAllClose
(
...
...
@@ -528,6 +508,7 @@ class WeightedSoftmaxClassificationLossTest(tf.test.TestCase):
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
loss_op
=
losses
.
WeightedSoftmaxClassificationLoss
()
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
loss
=
tf
.
reduce_sum
(
loss
)
exp_loss
=
-
1.5
*
math
.
log
(.
5
)
with
self
.
test_session
()
as
sess
:
...
...
@@ -553,7 +534,7 @@ class WeightedSoftmaxClassificationLossTest(tf.test.TestCase):
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
.
5
,
1
],
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
loss_op
=
losses
.
WeightedSoftmaxClassificationLoss
(
True
)
loss_op
=
losses
.
WeightedSoftmaxClassificationLoss
()
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
exp_loss
=
np
.
matrix
([[
0
,
0
,
-
0.5
*
math
.
log
(.
5
),
0
],
...
...
@@ -564,7 +545,7 @@ class WeightedSoftmaxClassificationLossTest(tf.test.TestCase):
def
testReturnsCorrectAnchorWiseLossWithHighLogitScaleSetting
(
self
):
"""At very high logit_scale, all predictions will be ~0.33."""
# TODO
(yonib)
: Also test logit_scale with anchorwise=False.
# TODO: Also test logit_scale with anchorwise=False.
logit_scale
=
10e16
prediction_tensor
=
tf
.
constant
([[[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
],
...
...
@@ -584,8 +565,7 @@ class WeightedSoftmaxClassificationLossTest(tf.test.TestCase):
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
1
]],
tf
.
float32
)
loss_op
=
losses
.
WeightedSoftmaxClassificationLoss
(
anchorwise_output
=
True
,
logit_scale
=
logit_scale
)
loss_op
=
losses
.
WeightedSoftmaxClassificationLoss
(
logit_scale
=
logit_scale
)
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
uniform_distribution_loss
=
-
math
.
log
(.
33333333333
)
...
...
@@ -621,6 +601,7 @@ class BootstrappedSigmoidClassificationLossTest(tf.test.TestCase):
loss_op
=
losses
.
BootstrappedSigmoidClassificationLoss
(
alpha
,
bootstrap_type
=
'soft'
)
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
loss
=
tf
.
reduce_sum
(
loss
)
exp_loss
=
-
math
.
log
(.
5
)
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
...
...
@@ -649,6 +630,7 @@ class BootstrappedSigmoidClassificationLossTest(tf.test.TestCase):
loss_op
=
losses
.
BootstrappedSigmoidClassificationLoss
(
alpha
,
bootstrap_type
=
'hard'
)
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
loss
=
tf
.
reduce_sum
(
loss
)
exp_loss
=
-
math
.
log
(.
5
)
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
...
...
@@ -675,9 +657,9 @@ class BootstrappedSigmoidClassificationLossTest(tf.test.TestCase):
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
alpha
=
tf
.
constant
(.
5
,
tf
.
float32
)
loss_op
=
losses
.
BootstrappedSigmoidClassificationLoss
(
alpha
,
bootstrap_type
=
'hard'
,
anchorwise_output
=
True
)
alpha
,
bootstrap_type
=
'hard'
)
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
loss
=
tf
.
reduce_sum
(
loss
,
axis
=
2
)
exp_loss
=
np
.
matrix
([[
0
,
0
,
-
math
.
log
(.
5
),
0
],
[
-
math
.
log
(.
5
),
0
,
0
,
0
]])
with
self
.
test_session
()
as
sess
:
...
...
research/object_detection/core/matcher.py
View file @
fd7b6887
...
...
@@ -36,6 +36,8 @@ from abc import abstractmethod
import
tensorflow
as
tf
from
object_detection.utils
import
ops
class
Match
(
object
):
"""Class to store results from the matcher.
...
...
@@ -44,7 +46,7 @@ class Match(object):
convenient methods to query the matching results.
"""
def
__init__
(
self
,
match_results
):
def
__init__
(
self
,
match_results
,
use_matmul_gather
=
False
):
"""Constructs a Match object.
Args:
...
...
@@ -52,6 +54,8 @@ class Match(object):
meaning that column i is matched with row match_results[i].
(2) match_results[i]=-1, meaning that column i is not matched.
(3) match_results[i]=-2, meaning that column i is ignored.
use_matmul_gather: Use matrix multiplication based gather instead of
standard tf.gather. (Default: False).
Raises:
ValueError: if match_results does not have rank 1 or is not an
...
...
@@ -63,6 +67,9 @@ class Match(object):
raise
ValueError
(
'match_results should be an int32 or int64 scalar '
'tensor'
)
self
.
_match_results
=
match_results
self
.
_gather_op
=
tf
.
gather
if
use_matmul_gather
:
self
.
_gather_op
=
ops
.
matmul_gather_on_zeroth_axis
@
property
def
match_results
(
self
):
...
...
@@ -163,17 +170,55 @@ class Match(object):
row_indices: int32 tensor of shape [K] with row indices.
"""
return
self
.
_reshape_and_cast
(
t
f
.
gather
(
self
.
_match_results
,
self
.
matched_column_indices
()))
sel
f
.
_
gather
_op
(
self
.
_match_results
,
self
.
matched_column_indices
()))
def
_reshape_and_cast
(
self
,
t
):
return
tf
.
cast
(
tf
.
reshape
(
t
,
[
-
1
]),
tf
.
int32
)
def
gather_based_on_match
(
self
,
input_tensor
,
unmatched_value
,
ignored_value
):
"""Gathers elements from `input_tensor` based on match results.
For columns that are matched to a row, gathered_tensor[col] is set to
input_tensor[match_results[col]]. For columns that are unmatched,
gathered_tensor[col] is set to unmatched_value. Finally, for columns that
are ignored gathered_tensor[col] is set to ignored_value.
Note that the input_tensor.shape[1:] must match with unmatched_value.shape
and ignored_value.shape
Args:
input_tensor: Tensor to gather values from.
unmatched_value: Constant tensor value for unmatched columns.
ignored_value: Constant tensor value for ignored columns.
Returns:
gathered_tensor: A tensor containing values gathered from input_tensor.
The shape of the gathered tensor is [match_results.shape[0]] +
input_tensor.shape[1:].
"""
input_tensor
=
tf
.
concat
([
tf
.
stack
([
ignored_value
,
unmatched_value
]),
input_tensor
],
axis
=
0
)
gather_indices
=
tf
.
maximum
(
self
.
match_results
+
2
,
0
)
gathered_tensor
=
self
.
_gather_op
(
input_tensor
,
gather_indices
)
return
gathered_tensor
class
Matcher
(
object
):
"""Abstract base class for matcher.
"""
__metaclass__
=
ABCMeta
def
__init__
(
self
,
use_matmul_gather
=
False
):
"""Constructs a Matcher.
Args:
use_matmul_gather: Force constructed match objects to use matrix
multiplication based gather instead of standard tf.gather.
(Default: False).
"""
self
.
_use_matmul_gather
=
use_matmul_gather
def
match
(
self
,
similarity_matrix
,
scope
=
None
,
**
params
):
"""Computes matches among row and column indices and returns the result.
...
...
@@ -191,11 +236,12 @@ class Matcher(object):
A Match object with the results of matching.
"""
with
tf
.
name_scope
(
scope
,
'Match'
,
[
similarity_matrix
,
params
])
as
scope
:
return
Match
(
self
.
_match
(
similarity_matrix
,
**
params
))
return
Match
(
self
.
_match
(
similarity_matrix
,
**
params
),
self
.
_use_matmul_gather
)
@
abstractmethod
def
_match
(
self
,
similarity_matrix
,
**
params
):
"""Method to be overriden by implementations.
"""Method to be overrid
d
en by implementations.
Args:
similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
...
...
Prev
1
2
3
4
5
6
…
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment