Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
27b4acd4
Commit
27b4acd4
authored
Sep 25, 2018
by
Aman Gupta
Browse files
Merge remote-tracking branch 'upstream/master'
parents
5133522f
d4e1f97f
Changes
240
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2420 additions
and
798 deletions
+2420
-798
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
...tion/meta_architectures/faster_rcnn_meta_arch_test_lib.py
+260
-196
research/object_detection/meta_architectures/rfcn_meta_arch.py
...rch/object_detection/meta_architectures/rfcn_meta_arch.py
+27
-17
research/object_detection/meta_architectures/ssd_meta_arch.py
...arch/object_detection/meta_architectures/ssd_meta_arch.py
+45
-51
research/object_detection/meta_architectures/ssd_meta_arch_test.py
...object_detection/meta_architectures/ssd_meta_arch_test.py
+87
-185
research/object_detection/meta_architectures/ssd_meta_arch_test_lib.py
...ct_detection/meta_architectures/ssd_meta_arch_test_lib.py
+224
-0
research/object_detection/metrics/coco_evaluation.py
research/object_detection/metrics/coco_evaluation.py
+33
-6
research/object_detection/metrics/coco_evaluation_test.py
research/object_detection/metrics/coco_evaluation_test.py
+80
-100
research/object_detection/metrics/offline_eval_map_corloc.py
research/object_detection/metrics/offline_eval_map_corloc.py
+2
-4
research/object_detection/model_lib.py
research/object_detection/model_lib.py
+136
-99
research/object_detection/model_lib_test.py
research/object_detection/model_lib_test.py
+19
-31
research/object_detection/model_main.py
research/object_detection/model_main.py
+20
-12
research/object_detection/model_tpu_main.py
research/object_detection/model_tpu_main.py
+15
-8
research/object_detection/models/feature_map_generators.py
research/object_detection/models/feature_map_generators.py
+249
-12
research/object_detection/models/feature_map_generators_test.py
...ch/object_detection/models/feature_map_generators_test.py
+267
-22
research/object_detection/models/keras_applications/__init__.py
...ch/object_detection/models/keras_applications/__init__.py
+0
-0
research/object_detection/models/keras_applications/mobilenet_v2.py
...bject_detection/models/keras_applications/mobilenet_v2.py
+307
-0
research/object_detection/models/keras_applications/mobilenet_v2_test.py
..._detection/models/keras_applications/mobilenet_v2_test.py
+467
-0
research/object_detection/models/ssd_feature_extractor_test.py
...rch/object_detection/models/ssd_feature_extractor_test.py
+77
-24
research/object_detection/models/ssd_mobilenet_v1_fpn_feature_extractor.py
...etection/models/ssd_mobilenet_v1_fpn_feature_extractor.py
+24
-3
research/object_detection/models/ssd_mobilenet_v2_feature_extractor_test.py
...tection/models/ssd_mobilenet_v2_feature_extractor_test.py
+81
-28
No files found.
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
View file @
27b4acd4
...
@@ -14,8 +14,12 @@
...
@@ -14,8 +14,12 @@
# ==============================================================================
# ==============================================================================
"""Tests for object_detection.meta_architectures.faster_rcnn_meta_arch."""
"""Tests for object_detection.meta_architectures.faster_rcnn_meta_arch."""
import
functools
from
absl.testing
import
parameterized
import
numpy
as
np
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow
as
tf
from
google.protobuf
import
text_format
from
google.protobuf
import
text_format
from
object_detection.anchor_generators
import
grid_anchor_generator
from
object_detection.anchor_generators
import
grid_anchor_generator
from
object_detection.builders
import
box_predictor_builder
from
object_detection.builders
import
box_predictor_builder
...
@@ -23,11 +27,14 @@ from object_detection.builders import hyperparams_builder
...
@@ -23,11 +27,14 @@ from object_detection.builders import hyperparams_builder
from
object_detection.builders
import
post_processing_builder
from
object_detection.builders
import
post_processing_builder
from
object_detection.core
import
balanced_positive_negative_sampler
as
sampler
from
object_detection.core
import
balanced_positive_negative_sampler
as
sampler
from
object_detection.core
import
losses
from
object_detection.core
import
losses
from
object_detection.core
import
post_processing
from
object_detection.core
import
target_assigner
from
object_detection.core
import
target_assigner
from
object_detection.meta_architectures
import
faster_rcnn_meta_arch
from
object_detection.meta_architectures
import
faster_rcnn_meta_arch
from
object_detection.protos
import
box_predictor_pb2
from
object_detection.protos
import
box_predictor_pb2
from
object_detection.protos
import
hyperparams_pb2
from
object_detection.protos
import
hyperparams_pb2
from
object_detection.protos
import
post_processing_pb2
from
object_detection.protos
import
post_processing_pb2
from
object_detection.utils
import
ops
from
object_detection.utils
import
test_case
from
object_detection.utils
import
test_utils
from
object_detection.utils
import
test_utils
slim
=
tf
.
contrib
.
slim
slim
=
tf
.
contrib
.
slim
...
@@ -60,7 +67,7 @@ class FakeFasterRCNNFeatureExtractor(
...
@@ -60,7 +67,7 @@ class FakeFasterRCNNFeatureExtractor(
num_outputs
=
3
,
kernel_size
=
1
,
scope
=
'layer2'
)
num_outputs
=
3
,
kernel_size
=
1
,
scope
=
'layer2'
)
class
FasterRCNNMetaArchTestBase
(
t
f
.
t
est
.
TestCase
):
class
FasterRCNNMetaArchTestBase
(
test
_case
.
TestCase
,
parameterized
.
TestCase
):
"""Base class to test Faster R-CNN and R-FCN meta architectures."""
"""Base class to test Faster R-CNN and R-FCN meta architectures."""
def
_build_arg_scope_with_hyperparams
(
self
,
def
_build_arg_scope_with_hyperparams
(
self
,
...
@@ -157,7 +164,8 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -157,7 +164,8 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
masks_are_class_agnostic
=
False
,
masks_are_class_agnostic
=
False
,
use_matmul_crop_and_resize
=
False
,
use_matmul_crop_and_resize
=
False
,
clip_anchors_to_image
=
False
,
clip_anchors_to_image
=
False
,
use_matmul_gather_in_matcher
=
False
):
use_matmul_gather_in_matcher
=
False
,
use_static_shapes
=
False
):
def
image_resizer_fn
(
image
,
masks
=
None
):
def
image_resizer_fn
(
image
,
masks
=
None
):
"""Fake image resizer function."""
"""Fake image resizer function."""
...
@@ -220,11 +228,18 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -220,11 +228,18 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
first_stage_box_predictor_depth
=
512
first_stage_box_predictor_depth
=
512
first_stage_minibatch_size
=
3
first_stage_minibatch_size
=
3
first_stage_sampler
=
sampler
.
BalancedPositiveNegativeSampler
(
first_stage_sampler
=
sampler
.
BalancedPositiveNegativeSampler
(
positive_fraction
=
0.5
,
is_static
=
False
)
positive_fraction
=
0.5
,
is_static
=
use_static_shapes
)
first_stage_nms_score_threshold
=
-
1.0
first_stage_nms_score_threshold
=
-
1.0
first_stage_nms_iou_threshold
=
1.0
first_stage_nms_iou_threshold
=
1.0
first_stage_max_proposals
=
first_stage_max_proposals
first_stage_max_proposals
=
first_stage_max_proposals
first_stage_non_max_suppression_fn
=
functools
.
partial
(
post_processing
.
batch_multiclass_non_max_suppression
,
score_thresh
=
first_stage_nms_score_threshold
,
iou_thresh
=
first_stage_nms_iou_threshold
,
max_size_per_class
=
first_stage_max_proposals
,
max_total_size
=
first_stage_max_proposals
,
use_static_shapes
=
use_static_shapes
)
first_stage_localization_loss_weight
=
1.0
first_stage_localization_loss_weight
=
1.0
first_stage_objectness_loss_weight
=
1.0
first_stage_objectness_loss_weight
=
1.0
...
@@ -246,7 +261,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -246,7 +261,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
second_stage_non_max_suppression_fn
,
_
=
post_processing_builder
.
build
(
second_stage_non_max_suppression_fn
,
_
=
post_processing_builder
.
build
(
post_processing_config
)
post_processing_config
)
second_stage_sampler
=
sampler
.
BalancedPositiveNegativeSampler
(
second_stage_sampler
=
sampler
.
BalancedPositiveNegativeSampler
(
positive_fraction
=
1.0
,
is_static
=
False
)
positive_fraction
=
1.0
,
is_static
=
use_static_shapes
)
second_stage_score_conversion_fn
=
tf
.
identity
second_stage_score_conversion_fn
=
tf
.
identity
second_stage_localization_loss_weight
=
1.0
second_stage_localization_loss_weight
=
1.0
...
@@ -268,6 +283,9 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -268,6 +283,9 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
loc_loss_weight
=
second_stage_localization_loss_weight
,
loc_loss_weight
=
second_stage_localization_loss_weight
,
max_negatives_per_positive
=
None
)
max_negatives_per_positive
=
None
)
crop_and_resize_fn
=
(
ops
.
matmul_crop_and_resize
if
use_matmul_crop_and_resize
else
ops
.
native_crop_and_resize
)
common_kwargs
=
{
common_kwargs
=
{
'is_training'
:
is_training
,
'is_training'
:
is_training
,
'num_classes'
:
num_classes
,
'num_classes'
:
num_classes
,
...
@@ -284,8 +302,8 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -284,8 +302,8 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
'first_stage_box_predictor_depth'
:
first_stage_box_predictor_depth
,
'first_stage_box_predictor_depth'
:
first_stage_box_predictor_depth
,
'first_stage_minibatch_size'
:
first_stage_minibatch_size
,
'first_stage_minibatch_size'
:
first_stage_minibatch_size
,
'first_stage_sampler'
:
first_stage_sampler
,
'first_stage_sampler'
:
first_stage_sampler
,
'first_stage_n
ms_score_threshold'
:
first_stage_nms_score_threshold
,
'first_stage_n
on_max_suppression_fn'
:
'
first_stage_n
ms_iou_threshold'
:
first_stage_nms_iou_threshold
,
first_stage_n
on_max_suppression_fn
,
'first_stage_max_proposals'
:
first_stage_max_proposals
,
'first_stage_max_proposals'
:
first_stage_max_proposals
,
'first_stage_localization_loss_weight'
:
'first_stage_localization_loss_weight'
:
first_stage_localization_loss_weight
,
first_stage_localization_loss_weight
,
...
@@ -304,8 +322,10 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -304,8 +322,10 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
'second_stage_classification_loss'
:
'second_stage_classification_loss'
:
second_stage_classification_loss
,
second_stage_classification_loss
,
'hard_example_miner'
:
hard_example_miner
,
'hard_example_miner'
:
hard_example_miner
,
'use_matmul_crop_and_resize'
:
use_matmul_crop_and_resize
,
'crop_and_resize_fn'
:
crop_and_resize_fn
,
'clip_anchors_to_image'
:
clip_anchors_to_image
'clip_anchors_to_image'
:
clip_anchors_to_image
,
'use_static_shapes'
:
use_static_shapes
,
'resize_masks'
:
True
,
}
}
return
self
.
_get_model
(
return
self
.
_get_model
(
...
@@ -412,7 +432,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -412,7 +432,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
anchors
=
prediction_out
[
'anchors'
]
anchors
=
prediction_out
[
'anchors'
]
self
.
assertTrue
(
len
(
anchors
.
shape
)
==
2
and
anchors
.
shape
[
1
]
==
4
)
self
.
assertTrue
(
len
(
anchors
.
shape
)
==
2
and
anchors
.
shape
[
1
]
==
4
)
num_anchors_out
=
anchors
.
shape
[
0
]
num_anchors_out
=
anchors
.
shape
[
0
]
self
.
assert
True
(
num_anchors_out
<
num_anchors_strict_upper_bound
)
self
.
assert
Less
(
num_anchors_out
,
num_anchors_strict_upper_bound
)
self
.
assertTrue
(
np
.
all
(
np
.
greater_equal
(
anchors
,
0
)))
self
.
assertTrue
(
np
.
all
(
np
.
greater_equal
(
anchors
,
0
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
0
],
height
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
0
],
height
)))
...
@@ -484,94 +504,97 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -484,94 +504,97 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
for
key
in
expected_shapes
:
for
key
in
expected_shapes
:
self
.
assertAllEqual
(
tensor_dict_out
[
key
].
shape
,
expected_shapes
[
key
])
self
.
assertAllEqual
(
tensor_dict_out
[
key
].
shape
,
expected_shapes
[
key
])
def
_test_predict_gives_correct_shapes_in_train_mode_both_stages
(
def
test_predict_gives_correct_shapes_in_train_mode_both_stages
(
self
,
use_matmul_crop_and_resize
=
False
,
self
,
clip_anchors_to_image
=
False
):
use_static_shapes
=
False
):
test_graph
=
tf
.
Graph
()
batch_size
=
2
with
test_graph
.
as_default
():
image_size
=
10
max_num_proposals
=
7
initial_crop_size
=
3
maxpool_stride
=
1
def
graph_fn
(
images
,
gt_boxes
,
gt_classes
,
gt_weights
):
"""Function to construct tf graph for the test."""
model
=
self
.
_build_model
(
model
=
self
.
_build_model
(
is_training
=
True
,
is_training
=
True
,
number_of_stages
=
2
,
number_of_stages
=
2
,
second_stage_batch_size
=
7
,
second_stage_batch_size
=
7
,
predict_masks
=
False
,
predict_masks
=
False
,
use_matmul_crop_and_resize
=
use_matmul_crop_and_resize
,
use_matmul_crop_and_resize
=
use_static_shapes
,
clip_anchors_to_image
=
clip_anchors_to_image
)
clip_anchors_to_image
=
use_static_shapes
,
use_static_shapes
=
use_static_shapes
)
batch_size
=
2
preprocessed_inputs
,
true_image_shapes
=
model
.
preprocess
(
images
)
image_size
=
10
max_num_proposals
=
7
initial_crop_size
=
3
maxpool_stride
=
1
image_shape
=
(
batch_size
,
image_size
,
image_size
,
3
)
preprocessed_inputs
=
tf
.
zeros
(
image_shape
,
dtype
=
tf
.
float32
)
groundtruth_boxes_list
=
[
tf
.
constant
([[
0
,
0
,
.
5
,
.
5
],
[.
5
,
.
5
,
1
,
1
]],
dtype
=
tf
.
float32
),
tf
.
constant
([[
0
,
.
5
,
.
5
,
1
],
[.
5
,
0
,
1
,
.
5
]],
dtype
=
tf
.
float32
)]
groundtruth_classes_list
=
[
tf
.
constant
([[
1
,
0
],
[
0
,
1
]],
dtype
=
tf
.
float32
),
tf
.
constant
([[
1
,
0
],
[
1
,
0
]],
dtype
=
tf
.
float32
)]
groundtruth_weights_list
=
[
tf
.
constant
([
1
,
1
],
dtype
=
tf
.
float32
),
tf
.
constant
([
1
,
1
],
dtype
=
tf
.
float32
)]
_
,
true_image_shapes
=
model
.
preprocess
(
tf
.
zeros
(
image_shape
))
model
.
provide_groundtruth
(
model
.
provide_groundtruth
(
groundtruth_boxes_list
,
groundtruth_boxes_list
=
tf
.
unstack
(
gt_boxes
),
groundtruth_classes_list
,
groundtruth_classes_list
=
tf
.
unstack
(
gt_classes
),
groundtruth_weights_list
=
groundtruth_weights_list
)
groundtruth_weights_list
=
tf
.
unstack
(
gt_weights
))
result_tensor_dict
=
model
.
predict
(
preprocessed_inputs
,
true_image_shapes
)
result_tensor_dict
=
model
.
predict
(
preprocessed_inputs
,
true_image_shapes
)
expected_shapes
=
{
return
(
result_tensor_dict
[
'refined_box_encodings'
],
'rpn_box_predictor_features'
:
result_tensor_dict
[
'class_predictions_with_background'
],
(
2
,
image_size
,
image_size
,
512
),
result_tensor_dict
[
'proposal_boxes'
],
'rpn_features_to_crop'
:
(
2
,
image_size
,
image_size
,
3
),
result_tensor_dict
[
'proposal_boxes_normalized'
],
'image_shape'
:
(
4
,),
result_tensor_dict
[
'anchors'
],
'refined_box_encodings'
:
(
2
*
max_num_proposals
,
2
,
4
),
result_tensor_dict
[
'rpn_box_encodings'
],
'class_predictions_with_background'
:
(
2
*
max_num_proposals
,
2
+
1
),
result_tensor_dict
[
'rpn_objectness_predictions_with_background'
],
'num_proposals'
:
(
2
,),
result_tensor_dict
[
'rpn_features_to_crop'
],
'proposal_boxes'
:
(
2
,
max_num_proposals
,
4
),
result_tensor_dict
[
'rpn_box_predictor_features'
],
'proposal_boxes_normalized'
:
(
2
,
max_num_proposals
,
4
),
)
'box_classifier_features'
:
self
.
_get_box_classifier_features_shape
(
image_size
,
image_shape
=
(
batch_size
,
image_size
,
image_size
,
3
)
batch_size
,
images
=
np
.
zeros
(
image_shape
,
dtype
=
np
.
float32
)
max_num_proposals
,
gt_boxes
=
np
.
stack
([
initial_crop_size
,
np
.
array
([[
0
,
0
,
.
5
,
.
5
],
[.
5
,
.
5
,
1
,
1
]],
dtype
=
np
.
float32
),
maxpool_stride
,
np
.
array
([[
0
,
.
5
,
.
5
,
1
],
[.
5
,
0
,
1
,
.
5
]],
dtype
=
np
.
float32
)
3
)
])
}
gt_classes
=
np
.
stack
([
np
.
array
([[
1
,
0
],
[
0
,
1
]],
dtype
=
np
.
float32
),
init_op
=
tf
.
global_variables_initializer
()
np
.
array
([[
1
,
0
],
[
1
,
0
]],
dtype
=
np
.
float32
)
with
self
.
test_session
(
graph
=
test_graph
)
as
sess
:
])
sess
.
run
(
init_op
)
gt_weights
=
np
.
stack
([
tensor_dict_out
=
sess
.
run
(
result_tensor_dict
)
np
.
array
([
1
,
1
],
dtype
=
np
.
float32
),
self
.
assertEqual
(
set
(
tensor_dict_out
.
keys
()),
np
.
array
([
1
,
1
],
dtype
=
np
.
float32
)
set
(
expected_shapes
.
keys
()).
union
(
set
([
])
'rpn_box_encodings'
,
if
use_static_shapes
:
'rpn_objectness_predictions_with_background'
,
results
=
self
.
execute
(
graph_fn
,
'anchors'
])))
[
images
,
gt_boxes
,
gt_classes
,
gt_weights
])
for
key
in
expected_shapes
:
else
:
self
.
assertAllEqual
(
tensor_dict_out
[
key
].
shape
,
expected_shapes
[
key
])
results
=
self
.
execute_cpu
(
graph_fn
,
[
images
,
gt_boxes
,
gt_classes
,
gt_weights
])
anchors_shape_out
=
tensor_dict_out
[
'anchors'
].
shape
self
.
assertEqual
(
2
,
len
(
anchors_shape_out
))
self
.
assertEqual
(
4
,
anchors_shape_out
[
1
])
num_anchors_out
=
anchors_shape_out
[
0
]
self
.
assertAllEqual
(
tensor_dict_out
[
'rpn_box_encodings'
].
shape
,
(
2
,
num_anchors_out
,
4
))
self
.
assertAllEqual
(
tensor_dict_out
[
'rpn_objectness_predictions_with_background'
].
shape
,
(
2
,
num_anchors_out
,
2
))
def
test_predict_gives_correct_shapes_in_train_mode_both_stages
(
self
):
self
.
_test_predict_gives_correct_shapes_in_train_mode_both_stages
()
def
test_predict_gives_correct_shapes_in_train_mode_matmul_crop_resize
(
self
):
self
.
_test_predict_gives_correct_shapes_in_train_mode_both_stages
(
use_matmul_crop_and_resize
=
True
)
def
test_predict_gives_correct_shapes_in_train_mode_clip_anchors
(
self
):
expected_shapes
=
{
self
.
_test_predict_gives_correct_shapes_in_train_mode_both_stages
(
'rpn_box_predictor_features'
:
(
2
,
image_size
,
image_size
,
512
),
clip_anchors_to_image
=
True
)
'rpn_features_to_crop'
:
(
2
,
image_size
,
image_size
,
3
),
'refined_box_encodings'
:
(
2
*
max_num_proposals
,
2
,
4
),
'class_predictions_with_background'
:
(
2
*
max_num_proposals
,
2
+
1
),
'proposal_boxes'
:
(
2
,
max_num_proposals
,
4
),
'rpn_box_encodings'
:
(
2
,
image_size
*
image_size
*
9
,
4
),
'proposal_boxes_normalized'
:
(
2
,
max_num_proposals
,
4
),
'box_classifier_features'
:
self
.
_get_box_classifier_features_shape
(
image_size
,
batch_size
,
max_num_proposals
,
initial_crop_size
,
maxpool_stride
,
3
),
'rpn_objectness_predictions_with_background'
:
(
2
,
image_size
*
image_size
*
9
,
2
)
}
# TODO(rathodv): Possibly change utils/test_case.py to accept dictionaries
# and return dicionaries so don't have to rely on the order of tensors.
self
.
assertAllEqual
(
results
[
0
].
shape
,
expected_shapes
[
'refined_box_encodings'
])
self
.
assertAllEqual
(
results
[
1
].
shape
,
expected_shapes
[
'class_predictions_with_background'
])
self
.
assertAllEqual
(
results
[
2
].
shape
,
expected_shapes
[
'proposal_boxes'
])
self
.
assertAllEqual
(
results
[
3
].
shape
,
expected_shapes
[
'proposal_boxes_normalized'
])
anchors_shape
=
results
[
4
].
shape
self
.
assertAllEqual
(
results
[
5
].
shape
,
[
batch_size
,
anchors_shape
[
0
],
4
])
self
.
assertAllEqual
(
results
[
6
].
shape
,
[
batch_size
,
anchors_shape
[
0
],
2
])
self
.
assertAllEqual
(
results
[
7
].
shape
,
expected_shapes
[
'rpn_features_to_crop'
])
self
.
assertAllEqual
(
results
[
8
].
shape
,
expected_shapes
[
'rpn_box_predictor_features'
])
def
_test_postprocess_first_stage_only_inference_mode
(
def
_test_postprocess_first_stage_only_inference_mode
(
self
,
pad_to_max_dimension
=
None
):
self
,
pad_to_max_dimension
=
None
):
...
@@ -848,10 +871,10 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -848,10 +871,10 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
loss_dict_out
=
sess
.
run
(
loss_dict
)
loss_dict_out
=
sess
.
run
(
loss_dict
)
self
.
assertAllClose
(
loss_dict_out
[
'Loss/RPNLoss/localization_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'Loss/RPNLoss/localization_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'Loss/RPNLoss/objectness_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'Loss/RPNLoss/objectness_loss'
],
0
)
self
.
assert
True
(
'Loss/BoxClassifierLoss/localization_loss'
self
.
assert
NotIn
(
'Loss/BoxClassifierLoss/localization_loss'
,
not
in
loss_dict_out
)
loss_dict_out
)
self
.
assert
True
(
'Loss/BoxClassifierLoss/classification_loss'
self
.
assert
NotIn
(
'Loss/BoxClassifierLoss/classification_loss'
,
not
in
loss_dict_out
)
loss_dict_out
)
# TODO(rathodv): Split test into two - with and without masks.
# TODO(rathodv): Split test into two - with and without masks.
def
test_loss_full
(
self
):
def
test_loss_full
(
self
):
...
@@ -1157,22 +1180,58 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -1157,22 +1180,58 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
'Loss/BoxClassifierLoss/classification_loss'
],
0
)
'Loss/BoxClassifierLoss/classification_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'Loss/BoxClassifierLoss/mask_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'Loss/BoxClassifierLoss/mask_loss'
],
0
)
def
test_loss_full_zero_padded_proposals_nonzero_loss_with_two_images
(
self
):
def
test_loss_full_zero_padded_proposals_nonzero_loss_with_two_images
(
model
=
self
.
_build_model
(
self
,
use_static_shapes
=
False
,
shared_boxes
=
False
):
is_training
=
True
,
number_of_stages
=
2
,
second_stage_batch_size
=
6
)
batch_size
=
2
batch_size
=
2
anchors
=
tf
.
constant
(
first_stage_max_proposals
=
8
second_stage_batch_size
=
6
num_classes
=
2
def
graph_fn
(
anchors
,
rpn_box_encodings
,
rpn_objectness_predictions_with_background
,
images
,
num_proposals
,
proposal_boxes
,
refined_box_encodings
,
class_predictions_with_background
,
groundtruth_boxes
,
groundtruth_classes
):
"""Function to construct tf graph for the test."""
model
=
self
.
_build_model
(
is_training
=
True
,
number_of_stages
=
2
,
second_stage_batch_size
=
second_stage_batch_size
,
first_stage_max_proposals
=
first_stage_max_proposals
,
num_classes
=
num_classes
,
use_matmul_crop_and_resize
=
use_static_shapes
,
clip_anchors_to_image
=
use_static_shapes
,
use_static_shapes
=
use_static_shapes
)
prediction_dict
=
{
'rpn_box_encodings'
:
rpn_box_encodings
,
'rpn_objectness_predictions_with_background'
:
rpn_objectness_predictions_with_background
,
'image_shape'
:
tf
.
shape
(
images
),
'anchors'
:
anchors
,
'refined_box_encodings'
:
refined_box_encodings
,
'class_predictions_with_background'
:
class_predictions_with_background
,
'proposal_boxes'
:
proposal_boxes
,
'num_proposals'
:
num_proposals
}
_
,
true_image_shapes
=
model
.
preprocess
(
images
)
model
.
provide_groundtruth
(
tf
.
unstack
(
groundtruth_boxes
),
tf
.
unstack
(
groundtruth_classes
))
loss_dict
=
model
.
loss
(
prediction_dict
,
true_image_shapes
)
return
(
loss_dict
[
'Loss/RPNLoss/localization_loss'
],
loss_dict
[
'Loss/RPNLoss/objectness_loss'
],
loss_dict
[
'Loss/BoxClassifierLoss/localization_loss'
],
loss_dict
[
'Loss/BoxClassifierLoss/classification_loss'
])
anchors
=
np
.
array
(
[[
0
,
0
,
16
,
16
],
[[
0
,
0
,
16
,
16
],
[
0
,
16
,
16
,
32
],
[
0
,
16
,
16
,
32
],
[
16
,
0
,
32
,
16
],
[
16
,
0
,
32
,
16
],
[
16
,
16
,
32
,
32
]],
dtype
=
tf
.
float32
)
[
16
,
16
,
32
,
32
]],
dtype
=
np
.
float32
)
rpn_box_encodings
=
tf
.
zeros
(
rpn_box_encodings
=
np
.
zeros
(
[
batch_size
,
[
batch_size
,
anchors
.
shape
[
1
],
BOX_CODE_SIZE
],
dtype
=
np
.
float32
)
anchors
.
get_shape
().
as_list
()[
0
],
BOX_CODE_SIZE
],
dtype
=
tf
.
float32
)
# use different numbers for the objectness category to break ties in
# use different numbers for the objectness category to break ties in
# order of boxes returned by NMS
# order of boxes returned by NMS
rpn_objectness_predictions_with_background
=
tf
.
constant
(
rpn_objectness_predictions_with_background
=
np
.
array
(
[[[
-
10
,
13
],
[[[
-
10
,
13
],
[
10
,
-
10
],
[
10
,
-
10
],
[
10
,
-
11
],
[
10
,
-
11
],
...
@@ -1180,13 +1239,13 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -1180,13 +1239,13 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
[[
-
10
,
13
],
[[
-
10
,
13
],
[
10
,
-
10
],
[
10
,
-
10
],
[
10
,
-
11
],
[
10
,
-
11
],
[
10
,
-
12
]]],
dtype
=
tf
.
float32
)
[
10
,
-
12
]]],
dtype
=
np
.
float32
)
image
_shape
=
tf
.
constant
([
batch_size
,
32
,
32
,
3
],
dtype
=
tf
.
in
t32
)
image
s
=
np
.
zeros
([
batch_size
,
32
,
32
,
3
],
dtype
=
np
.
floa
t32
)
# box_classifier_batch_size is 6, but here we assume that the number of
# box_classifier_batch_size is 6, but here we assume that the number of
# actual proposals (not counting zero paddings) is fewer.
# actual proposals (not counting zero paddings) is fewer.
num_proposals
=
tf
.
constant
([
3
,
2
],
dtype
=
tf
.
int32
)
num_proposals
=
np
.
array
([
3
,
2
],
dtype
=
np
.
int32
)
proposal_boxes
=
tf
.
constant
(
proposal_boxes
=
np
.
array
(
[[[
0
,
0
,
16
,
16
],
[[[
0
,
0
,
16
,
16
],
[
0
,
16
,
16
,
32
],
[
0
,
16
,
16
,
32
],
[
16
,
0
,
32
,
16
],
[
16
,
0
,
32
,
16
],
...
@@ -1198,13 +1257,13 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -1198,13 +1257,13 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
[
0
,
0
,
0
,
0
],
# begin paddings
[
0
,
0
,
0
,
0
],
# begin paddings
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]],
dtype
=
tf
.
float32
)
[
0
,
0
,
0
,
0
]]],
dtype
=
np
.
float32
)
refined_box_encodings
=
tf
.
zeros
(
refined_box_encodings
=
np
.
zeros
(
(
batch_size
*
model
.
max_num_proposals
,
(
batch_size
*
second_stage_batch_size
,
1
model
.
num_classes
,
if
shared_boxes
else
num_classes
,
BOX_CODE_SIZE
)
,
BOX_CODE_SIZE
),
dtype
=
tf
.
float32
)
dtype
=
np
.
float32
)
class_predictions_with_background
=
tf
.
constant
(
class_predictions_with_background
=
np
.
array
(
[[
-
10
,
10
,
-
10
],
# first image
[[
-
10
,
10
,
-
10
],
# first image
[
10
,
-
10
,
-
10
],
[
10
,
-
10
,
-
10
],
[
10
,
-
10
,
-
10
],
[
10
,
-
10
,
-
10
],
...
@@ -1216,7 +1275,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -1216,7 +1275,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
[
0
,
0
,
0
],
# begin paddings
[
0
,
0
,
0
],
# begin paddings
[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
],],
dtype
=
tf
.
float32
)
[
0
,
0
,
0
],],
dtype
=
np
.
float32
)
# The first groundtruth box is 4/5 of the anchor size in both directions
# The first groundtruth box is 4/5 of the anchor size in both directions
# experiencing a loss of:
# experiencing a loss of:
...
@@ -1225,38 +1284,29 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -1225,38 +1284,29 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
# The second groundtruth box is identical to the prediction and thus
# The second groundtruth box is identical to the prediction and thus
# experiences zero loss.
# experiences zero loss.
# Total average loss is (abs(5 * log(1/2)) - .5) / 3.
# Total average loss is (abs(5 * log(1/2)) - .5) / 3.
groundtruth_boxes_list
=
[
groundtruth_boxes
=
np
.
stack
([
tf
.
constant
([[
0.05
,
0.05
,
0.45
,
0.45
]],
dtype
=
tf
.
float32
),
np
.
array
([[
0.05
,
0.05
,
0.45
,
0.45
]],
dtype
=
np
.
float32
),
tf
.
constant
([[
0.0
,
0.0
,
0.5
,
0.5
]],
dtype
=
tf
.
float32
)]
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
]],
dtype
=
np
.
float32
)])
groundtruth_classes_list
=
[
tf
.
constant
([[
1
,
0
]],
dtype
=
tf
.
float32
),
groundtruth_classes
=
np
.
stack
([
np
.
array
([[
1
,
0
]],
dtype
=
np
.
float32
),
tf
.
constant
([[
0
,
1
]],
dtype
=
tf
.
float32
)]
np
.
array
([[
0
,
1
]],
dtype
=
np
.
float32
)])
exp_loc_loss
=
(
-
5
*
np
.
log
(.
8
)
-
0.5
)
/
3.0
execute_fn
=
self
.
execute_cpu
if
use_static_shapes
:
execute_fn
=
self
.
execute
results
=
execute_fn
(
graph_fn
,
[
anchors
,
rpn_box_encodings
,
rpn_objectness_predictions_with_background
,
images
,
num_proposals
,
proposal_boxes
,
refined_box_encodings
,
class_predictions_with_background
,
groundtruth_boxes
,
groundtruth_classes
])
prediction_dict
=
{
exp_loc_loss
=
(
-
5
*
np
.
log
(.
8
)
-
0.5
)
/
3.0
'rpn_box_encodings'
:
rpn_box_encodings
,
'rpn_objectness_predictions_with_background'
:
rpn_objectness_predictions_with_background
,
'image_shape'
:
image_shape
,
'anchors'
:
anchors
,
'refined_box_encodings'
:
refined_box_encodings
,
'class_predictions_with_background'
:
class_predictions_with_background
,
'proposal_boxes'
:
proposal_boxes
,
'num_proposals'
:
num_proposals
}
_
,
true_image_shapes
=
model
.
preprocess
(
tf
.
zeros
(
image_shape
))
model
.
provide_groundtruth
(
groundtruth_boxes_list
,
groundtruth_classes_list
)
loss_dict
=
model
.
loss
(
prediction_dict
,
true_image_shapes
)
with
self
.
test_session
()
as
sess
:
self
.
assertAllClose
(
results
[
0
],
exp_loc_loss
,
rtol
=
1e-4
,
atol
=
1e-4
)
loss_dict_out
=
sess
.
run
(
loss_dict
)
self
.
assertAllClose
(
results
[
1
],
0.0
)
self
.
assertAllClose
(
loss_dict_out
[
'Loss/RPNLoss/localization_loss'
],
self
.
assertAllClose
(
results
[
2
],
exp_loc_loss
,
rtol
=
1e-4
,
atol
=
1e-4
)
exp_loc_loss
)
self
.
assertAllClose
(
results
[
3
],
0.0
)
self
.
assertAllClose
(
loss_dict_out
[
'Loss/RPNLoss/objectness_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'Loss/BoxClassifierLoss/localization_loss'
],
exp_loc_loss
)
self
.
assertAllClose
(
loss_dict_out
[
'Loss/BoxClassifierLoss/classification_loss'
],
0
)
def
test_loss_with_hard_mining
(
self
):
def
test_loss_with_hard_mining
(
self
):
model
=
self
.
_build_model
(
is_training
=
True
,
model
=
self
.
_build_model
(
is_training
=
True
,
...
@@ -1346,10 +1396,14 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -1346,10 +1396,14 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
self
.
assertAllClose
(
loss_dict_out
[
self
.
assertAllClose
(
loss_dict_out
[
'Loss/BoxClassifierLoss/classification_loss'
],
0
)
'Loss/BoxClassifierLoss/classification_loss'
],
0
)
def
test_loss_full_with_shared_boxes
(
self
):
def
test_loss_with_hard_mining_and_losses_mask
(
self
):
model
=
self
.
_build_model
(
model
=
self
.
_build_model
(
is_training
=
True
,
is_training
=
True
,
number_of_stages
=
2
,
second_stage_batch_size
=
6
)
number_of_stages
=
2
,
second_stage_batch_size
=
None
,
first_stage_max_proposals
=
6
,
hard_mining
=
True
)
batch_size
=
2
batch_size
=
2
number_of_proposals
=
3
anchors
=
tf
.
constant
(
anchors
=
tf
.
constant
(
[[
0
,
0
,
16
,
16
],
[[
0
,
0
,
16
,
16
],
[
0
,
16
,
16
,
32
],
[
0
,
16
,
16
,
32
],
...
@@ -1361,63 +1415,77 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -1361,63 +1415,77 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
BOX_CODE_SIZE
],
dtype
=
tf
.
float32
)
BOX_CODE_SIZE
],
dtype
=
tf
.
float32
)
# use different numbers for the objectness category to break ties in
# use different numbers for the objectness category to break ties in
# order of boxes returned by NMS
# order of boxes returned by NMS
rpn_objectness_predictions_with_background
=
tf
.
constant
(
[
rpn_objectness_predictions_with_background
=
tf
.
constant
(
[[
-
10
,
13
],
[[
[
-
10
,
13
],
[
10
,
-
10
],
[
-
10
,
12
],
[
10
,
-
11
],
[
10
,
-
11
],
[
-
10
,
12
]],
[
10
,
-
12
]],
[[
10
,
-
10
],
[[
-
10
,
13
],
[
-
10
,
1
3
],
[
-
10
,
1
2
],
[
-
10
,
1
2
],
[
10
,
-
1
1
],
[
10
,
-
1
1
]]],
dtype
=
tf
.
float32
)
[
10
,
-
1
2
]]],
dtype
=
tf
.
float32
)
image_shape
=
tf
.
constant
([
batch_size
,
32
,
32
,
3
],
dtype
=
tf
.
int32
)
image_shape
=
tf
.
constant
([
batch_size
,
32
,
32
,
3
],
dtype
=
tf
.
int32
)
num_proposals
=
tf
.
constant
([
6
,
6
],
dtype
=
tf
.
int32
)
# box_classifier_batch_size is 6, but here we assume that the number of
# actual proposals (not counting zero paddings) is fewer (3).
num_proposals
=
tf
.
constant
([
number_of_proposals
,
number_of_proposals
],
dtype
=
tf
.
int32
)
proposal_boxes
=
tf
.
constant
(
proposal_boxes
=
tf
.
constant
(
2
*
[[[
0
,
0
,
16
,
16
],
[[[
0
,
0
,
16
,
16
],
# first image
[
0
,
16
,
16
,
32
],
[
0
,
16
,
16
,
32
],
[
16
,
0
,
32
,
16
],
[
16
,
0
,
32
,
16
],
[
16
,
16
,
32
,
32
],
[
0
,
0
,
0
,
0
],
# begin paddings
[
0
,
0
,
16
,
16
],
[
0
,
0
,
0
,
0
],
[
0
,
16
,
16
,
32
]]],
dtype
=
tf
.
float32
)
[
0
,
0
,
0
,
0
]],
[[
0
,
0
,
16
,
16
],
# second image
[
0
,
16
,
16
,
32
],
[
16
,
0
,
32
,
16
],
[
0
,
0
,
0
,
0
],
# begin paddings
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]],
dtype
=
tf
.
float32
)
refined_box_encodings
=
tf
.
zeros
(
refined_box_encodings
=
tf
.
zeros
(
(
batch_size
*
model
.
max_num_proposals
,
(
batch_size
*
model
.
max_num_proposals
,
1
,
# one box shared among all the
classes
model
.
num_
classes
,
BOX_CODE_SIZE
),
dtype
=
tf
.
float32
)
BOX_CODE_SIZE
),
dtype
=
tf
.
float32
)
class_predictions_with_background
=
tf
.
constant
(
class_predictions_with_background
=
tf
.
constant
(
[[
-
10
,
10
,
-
10
],
# first image
[[
-
10
,
10
,
-
10
],
# first image
[
10
,
-
10
,
-
10
],
[
10
,
-
10
,
-
10
],
[
-
10
,
-
10
,
10
],
[
-
10
,
-
10
,
10
],
[
-
10
,
10
,
-
10
],
[
10
,
-
10
,
-
10
],
[
10
,
-
10
,
-
10
],
# second image
[
-
10
,
10
,
-
10
],
[
-
10
,
10
,
-
10
],
[
10
,
-
10
,
-
10
],
[
10
,
-
10
,
-
10
],
[
0
,
0
,
0
],
# begin paddings
[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
-
10
,
10
,
-
10
],
# second image
[
-
10
,
-
10
,
10
],
[
10
,
-
10
,
-
10
],
[
10
,
-
10
,
-
10
],
[
-
10
,
10
,
-
10
]],
dtype
=
tf
.
float32
)
[
0
,
0
,
0
],
# begin paddings
[
0
,
0
,
0
],
mask_predictions_logits
=
20
*
tf
.
ones
((
batch_size
*
[
0
,
0
,
0
]],
dtype
=
tf
.
float32
)
model
.
max_num_proposals
,
model
.
num_classes
,
14
,
14
),
dtype
=
tf
.
float32
)
# The first groundtruth box is 4/5 of the anchor size in both directions
# experiencing a loss of:
# 2 * SmoothL1(5 * log(4/5)) / (num_proposals * batch_size)
# = 2 * (abs(5 * log(1/2)) - .5) / 3
# The second groundtruth box is 46/50 of the anchor size in both directions
# experiencing a loss of:
# 2 * SmoothL1(5 * log(42/50)) / (num_proposals * batch_size)
# = 2 * (.5(5 * log(.92))^2 - .5) / 3.
# Since the first groundtruth box experiences greater loss, and we have
# set num_hard_examples=1 in the HardMiner, the final localization loss
# corresponds to that of the first groundtruth box.
groundtruth_boxes_list
=
[
groundtruth_boxes_list
=
[
tf
.
constant
([[
0
,
0
,
.
5
,
.
5
],
[.
5
,
.
5
,
1
,
1
]],
dtype
=
tf
.
float32
),
tf
.
constant
([[
0.05
,
0.05
,
0.45
,
0.45
],
tf
.
constant
([[
0
,
.
5
,
.
5
,
1
],
[.
5
,
0
,
1
,
.
5
]],
dtype
=
tf
.
float32
)]
[
0.02
,
0.52
,
0.48
,
0.98
]],
dtype
=
tf
.
float32
),
groundtruth_classes_list
=
[
tf
.
constant
([[
1
,
0
],
[
0
,
1
]],
dtype
=
tf
.
float32
),
tf
.
constant
([[
0.05
,
0.05
,
0.45
,
0.45
],
tf
.
constant
([[
1
,
0
],
[
1
,
0
]],
dtype
=
tf
.
float32
)]
[
0.02
,
0.52
,
0.48
,
0.98
]],
dtype
=
tf
.
float32
)]
groundtruth_classes_list
=
[
tf
.
constant
([[
1
,
0
],
[
0
,
1
]],
dtype
=
tf
.
float32
),
tf
.
constant
([[
1
,
0
],
[
0
,
1
]],
dtype
=
tf
.
float32
)]
is_annotated_list
=
[
tf
.
constant
(
True
,
dtype
=
tf
.
bool
),
tf
.
constant
(
False
,
dtype
=
tf
.
bool
)]
exp_loc_loss
=
(
2
*
(
-
5
*
np
.
log
(.
8
)
-
0.5
)
/
(
number_of_proposals
*
batch_size
))
# Set all elements of groundtruth mask to 1.0. In this case all proposal
# crops of the groundtruth masks should return a mask that covers the entire
# proposal. Thus, if mask_predictions_logits element values are all greater
# than 20, the loss should be zero.
groundtruth_masks_list
=
[
tf
.
convert_to_tensor
(
np
.
ones
((
2
,
32
,
32
)),
dtype
=
tf
.
float32
),
tf
.
convert_to_tensor
(
np
.
ones
((
2
,
32
,
32
)),
dtype
=
tf
.
float32
)]
prediction_dict
=
{
prediction_dict
=
{
'rpn_box_encodings'
:
rpn_box_encodings
,
'rpn_box_encodings'
:
rpn_box_encodings
,
'rpn_objectness_predictions_with_background'
:
'rpn_objectness_predictions_with_background'
:
...
@@ -1427,24 +1495,20 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -1427,24 +1495,20 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
'refined_box_encodings'
:
refined_box_encodings
,
'refined_box_encodings'
:
refined_box_encodings
,
'class_predictions_with_background'
:
class_predictions_with_background
,
'class_predictions_with_background'
:
class_predictions_with_background
,
'proposal_boxes'
:
proposal_boxes
,
'proposal_boxes'
:
proposal_boxes
,
'num_proposals'
:
num_proposals
,
'num_proposals'
:
num_proposals
'mask_predictions'
:
mask_predictions_logits
}
}
_
,
true_image_shapes
=
model
.
preprocess
(
tf
.
zeros
(
image_shape
))
_
,
true_image_shapes
=
model
.
preprocess
(
tf
.
zeros
(
image_shape
))
model
.
provide_groundtruth
(
groundtruth_boxes_list
,
model
.
provide_groundtruth
(
groundtruth_boxes_list
,
groundtruth_classes_list
,
groundtruth_classes_list
,
groundtruth_masks
_list
)
is_annotated_list
=
is_annotated
_list
)
loss_dict
=
model
.
loss
(
prediction_dict
,
true_image_shapes
)
loss_dict
=
model
.
loss
(
prediction_dict
,
true_image_shapes
)
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
loss_dict_out
=
sess
.
run
(
loss_dict
)
loss_dict_out
=
sess
.
run
(
loss_dict
)
self
.
assertAllClose
(
loss_dict_out
[
'Loss/RPNLoss/localization_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'Loss/RPNLoss/objectness_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
self
.
assertAllClose
(
loss_dict_out
[
'Loss/BoxClassifierLoss/localization_loss'
],
0
)
'Loss/BoxClassifierLoss/localization_loss'
],
exp_loc_loss
)
self
.
assertAllClose
(
loss_dict_out
[
self
.
assertAllClose
(
loss_dict_out
[
'Loss/BoxClassifierLoss/classification_loss'
],
0
)
'Loss/BoxClassifierLoss/classification_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'Loss/BoxClassifierLoss/mask_loss'
],
0
)
def
test_restore_map_for_classification_ckpt
(
self
):
def
test_restore_map_for_classification_ckpt
(
self
):
# Define mock tensorflow classification graph and save variables.
# Define mock tensorflow classification graph and save variables.
...
...
research/object_detection/meta_architectures/rfcn_meta_arch.py
View file @
27b4acd4
...
@@ -62,11 +62,11 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
...
@@ -62,11 +62,11 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
first_stage_box_predictor_depth
,
first_stage_box_predictor_depth
,
first_stage_minibatch_size
,
first_stage_minibatch_size
,
first_stage_sampler
,
first_stage_sampler
,
first_stage_nms_score_threshold
,
first_stage_non_max_suppression_fn
,
first_stage_nms_iou_threshold
,
first_stage_max_proposals
,
first_stage_max_proposals
,
first_stage_localization_loss_weight
,
first_stage_localization_loss_weight
,
first_stage_objectness_loss_weight
,
first_stage_objectness_loss_weight
,
crop_and_resize_fn
,
second_stage_target_assigner
,
second_stage_target_assigner
,
second_stage_rfcn_box_predictor
,
second_stage_rfcn_box_predictor
,
second_stage_batch_size
,
second_stage_batch_size
,
...
@@ -79,8 +79,9 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
...
@@ -79,8 +79,9 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
hard_example_miner
,
hard_example_miner
,
parallel_iterations
=
16
,
parallel_iterations
=
16
,
add_summaries
=
True
,
add_summaries
=
True
,
use_matmul_crop_and_resize
=
False
,
clip_anchors_to_image
=
False
,
clip_anchors_to_image
=
False
):
use_static_shapes
=
False
,
resize_masks
=
False
):
"""RFCNMetaArch Constructor.
"""RFCNMetaArch Constructor.
Args:
Args:
...
@@ -123,18 +124,22 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
...
@@ -123,18 +124,22 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
only called "batch_size" due to terminology from the Faster R-CNN paper.
only called "batch_size" due to terminology from the Faster R-CNN paper.
first_stage_sampler: The sampler for the boxes used to calculate the RPN
first_stage_sampler: The sampler for the boxes used to calculate the RPN
loss after the first stage.
loss after the first stage.
first_stage_nms_score_threshold: Score threshold for non max suppression
first_stage_non_max_suppression_fn: batch_multiclass_non_max_suppression
for the Region Proposal Network (RPN). This value is expected to be in
callable that takes `boxes`, `scores` and optional `clip_window`(with
[0, 1] as it is applied directly after a softmax transformation. The
all other inputs already set) and returns a dictionary containing
recommended value for Faster R-CNN is 0.
tensors with keys: `detection_boxes`, `detection_scores`,
first_stage_nms_iou_threshold: The Intersection Over Union (IOU) threshold
`detection_classes`, `num_detections`. This is used to perform non max
for performing Non-Max Suppression (NMS) on the boxes predicted by the
suppression on the boxes predicted by the Region Proposal Network
Region Proposal Network (RPN).
(RPN).
See `post_processing.batch_multiclass_non_max_suppression` for the type
and shape of these tensors.
first_stage_max_proposals: Maximum number of boxes to retain after
first_stage_max_proposals: Maximum number of boxes to retain after
performing Non-Max Suppression (NMS) on the boxes predicted by the
performing Non-Max Suppression (NMS) on the boxes predicted by the
Region Proposal Network (RPN).
Region Proposal Network (RPN).
first_stage_localization_loss_weight: A float
first_stage_localization_loss_weight: A float
first_stage_objectness_loss_weight: A float
first_stage_objectness_loss_weight: A float
crop_and_resize_fn: A differentiable resampler to use for cropping RPN
proposal features.
second_stage_target_assigner: Target assigner to use for second stage of
second_stage_target_assigner: Target assigner to use for second stage of
R-FCN. If the model is configured with multiple prediction heads, this
R-FCN. If the model is configured with multiple prediction heads, this
target assigner is used to generate targets for all heads (with the
target assigner is used to generate targets for all heads (with the
...
@@ -168,12 +173,13 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
...
@@ -168,12 +173,13 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
in parallel for calls to tf.map_fn.
in parallel for calls to tf.map_fn.
add_summaries: boolean (default: True) controlling whether summary ops
add_summaries: boolean (default: True) controlling whether summary ops
should be added to tensorflow graph.
should be added to tensorflow graph.
use_matmul_crop_and_resize: Force the use of matrix multiplication based
crop and resize instead of standard tf.image.crop_and_resize while
computing second stage input feature maps.
clip_anchors_to_image: The anchors generated are clip to the
clip_anchors_to_image: The anchors generated are clip to the
window size without filtering the nonoverlapping anchors. This generates
window size without filtering the nonoverlapping anchors. This generates
a static number of anchors. This argument is unused.
a static number of anchors. This argument is unused.
use_static_shapes: If True, uses implementation of ops with static shape
guarantees.
resize_masks: Indicates whether the masks presend in the groundtruth
should be resized in the model with `image_resizer_fn`
Raises:
Raises:
ValueError: If `second_stage_batch_size` > `first_stage_max_proposals`
ValueError: If `second_stage_batch_size` > `first_stage_max_proposals`
...
@@ -196,11 +202,11 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
...
@@ -196,11 +202,11 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
first_stage_box_predictor_depth
,
first_stage_box_predictor_depth
,
first_stage_minibatch_size
,
first_stage_minibatch_size
,
first_stage_sampler
,
first_stage_sampler
,
first_stage_nms_score_threshold
,
first_stage_non_max_suppression_fn
,
first_stage_nms_iou_threshold
,
first_stage_max_proposals
,
first_stage_max_proposals
,
first_stage_localization_loss_weight
,
first_stage_localization_loss_weight
,
first_stage_objectness_loss_weight
,
first_stage_objectness_loss_weight
,
crop_and_resize_fn
,
None
,
# initial_crop_size is not used in R-FCN
None
,
# initial_crop_size is not used in R-FCN
None
,
# maxpool_kernel_size is not use in R-FCN
None
,
# maxpool_kernel_size is not use in R-FCN
None
,
# maxpool_stride is not use in R-FCN
None
,
# maxpool_stride is not use in R-FCN
...
@@ -215,7 +221,11 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
...
@@ -215,7 +221,11 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
second_stage_classification_loss
,
second_stage_classification_loss
,
1.0
,
# second stage mask prediction loss weight isn't used in R-FCN.
1.0
,
# second stage mask prediction loss weight isn't used in R-FCN.
hard_example_miner
,
hard_example_miner
,
parallel_iterations
)
parallel_iterations
,
add_summaries
,
clip_anchors_to_image
,
use_static_shapes
,
resize_masks
)
self
.
_rfcn_box_predictor
=
second_stage_rfcn_box_predictor
self
.
_rfcn_box_predictor
=
second_stage_rfcn_box_predictor
...
...
research/object_detection/meta_architectures/ssd_meta_arch.py
View file @
27b4acd4
...
@@ -125,12 +125,13 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
...
@@ -125,12 +125,13 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
depth_multiplier
,
depth_multiplier
,
min_depth
,
min_depth
,
pad_to_multiple
,
pad_to_multiple
,
conv_hyperparams
_config
,
conv_hyperparams
,
freeze_batchnorm
,
freeze_batchnorm
,
inplace_batchnorm_update
,
inplace_batchnorm_update
,
use_explicit_padding
=
False
,
use_explicit_padding
=
False
,
use_depthwise
=
False
,
use_depthwise
=
False
,
override_base_feature_extractor_hyperparams
=
False
):
override_base_feature_extractor_hyperparams
=
False
,
name
=
None
):
"""Constructor.
"""Constructor.
Args:
Args:
...
@@ -139,9 +140,9 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
...
@@ -139,9 +140,9 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
min_depth: minimum feature extractor depth.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
width dimensions to.
conv_hyperparams
_config: A hyperparams.proto object containing
conv_hyperparams
: `hyperparams_builder.KerasLayerHyperparams` object
convolution hyperparameters for the layers added on top of
the
containing
convolution hyperparameters for the layers added on top of
base feature extractor.
the
base feature extractor.
freeze_batchnorm: Whether to freeze batch norm parameters during
freeze_batchnorm: Whether to freeze batch norm parameters during
training or not. When training with a small batch size (e.g. 1), it is
training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm
desirable to freeze batch norm update and use pretrained batch norm
...
@@ -156,14 +157,16 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
...
@@ -156,14 +157,16 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
override_base_feature_extractor_hyperparams: Whether to override
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_config`.
`conv_hyperparams_config`.
name: A string name scope to assign to the model. If 'None', Keras
will auto-generate one from the class name.
"""
"""
super
(
SSDKerasFeatureExtractor
,
self
).
__init__
()
super
(
SSDKerasFeatureExtractor
,
self
).
__init__
(
name
=
name
)
self
.
_is_training
=
is_training
self
.
_is_training
=
is_training
self
.
_depth_multiplier
=
depth_multiplier
self
.
_depth_multiplier
=
depth_multiplier
self
.
_min_depth
=
min_depth
self
.
_min_depth
=
min_depth
self
.
_pad_to_multiple
=
pad_to_multiple
self
.
_pad_to_multiple
=
pad_to_multiple
self
.
_conv_hyperparams
_config
=
conv_hyperparams
_config
self
.
_conv_hyperparams
=
conv_hyperparams
self
.
_freeze_batchnorm
=
freeze_batchnorm
self
.
_freeze_batchnorm
=
freeze_batchnorm
self
.
_inplace_batchnorm_update
=
inplace_batchnorm_update
self
.
_inplace_batchnorm_update
=
inplace_batchnorm_update
self
.
_use_explicit_padding
=
use_explicit_padding
self
.
_use_explicit_padding
=
use_explicit_padding
...
@@ -225,10 +228,7 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -225,10 +228,7 @@ class SSDMetaArch(model.DetectionModel):
box_predictor
,
box_predictor
,
box_coder
,
box_coder
,
feature_extractor
,
feature_extractor
,
matcher
,
region_similarity_calculator
,
encode_background_as_zeros
,
encode_background_as_zeros
,
negative_class_weight
,
image_resizer_fn
,
image_resizer_fn
,
non_max_suppression_fn
,
non_max_suppression_fn
,
score_conversion_fn
,
score_conversion_fn
,
...
@@ -238,14 +238,14 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -238,14 +238,14 @@ class SSDMetaArch(model.DetectionModel):
localization_loss_weight
,
localization_loss_weight
,
normalize_loss_by_num_matches
,
normalize_loss_by_num_matches
,
hard_example_miner
,
hard_example_miner
,
target_assigner_instance
,
add_summaries
=
True
,
add_summaries
=
True
,
normalize_loc_loss_by_codesize
=
False
,
normalize_loc_loss_by_codesize
=
False
,
freeze_batchnorm
=
False
,
freeze_batchnorm
=
False
,
inplace_batchnorm_update
=
False
,
inplace_batchnorm_update
=
False
,
add_background_class
=
True
,
add_background_class
=
True
,
random_example_sampler
=
None
,
random_example_sampler
=
None
,
expected_classification_loss_under_sampling
=
None
,
expected_classification_loss_under_sampling
=
None
):
target_assigner_instance
=
None
):
"""SSDMetaArch Constructor.
"""SSDMetaArch Constructor.
TODO(rathodv,jonathanhuang): group NMS parameters + score converter into
TODO(rathodv,jonathanhuang): group NMS parameters + score converter into
...
@@ -259,13 +259,9 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -259,13 +259,9 @@ class SSDMetaArch(model.DetectionModel):
box_predictor: a box_predictor.BoxPredictor object.
box_predictor: a box_predictor.BoxPredictor object.
box_coder: a box_coder.BoxCoder object.
box_coder: a box_coder.BoxCoder object.
feature_extractor: a SSDFeatureExtractor object.
feature_extractor: a SSDFeatureExtractor object.
matcher: a matcher.Matcher object.
region_similarity_calculator: a
region_similarity_calculator.RegionSimilarityCalculator object.
encode_background_as_zeros: boolean determining whether background
encode_background_as_zeros: boolean determining whether background
targets are to be encoded as an all zeros vector or a one-hot
targets are to be encoded as an all zeros vector or a one-hot
vector (where background is the 0th class).
vector (where background is the 0th class).
negative_class_weight: Weight for confidence loss of negative anchors.
image_resizer_fn: a callable for image resizing. This callable always
image_resizer_fn: a callable for image resizing. This callable always
takes a rank-3 image tensor (corresponding to a single image) and
takes a rank-3 image tensor (corresponding to a single image) and
returns a rank-3 image tensor, possibly with new spatial dimensions and
returns a rank-3 image tensor, possibly with new spatial dimensions and
...
@@ -288,6 +284,7 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -288,6 +284,7 @@ class SSDMetaArch(model.DetectionModel):
localization_loss_weight: float
localization_loss_weight: float
normalize_loss_by_num_matches: boolean
normalize_loss_by_num_matches: boolean
hard_example_miner: a losses.HardExampleMiner object (can be None)
hard_example_miner: a losses.HardExampleMiner object (can be None)
target_assigner_instance: target_assigner.TargetAssigner instance to use.
add_summaries: boolean (default: True) controlling whether summary ops
add_summaries: boolean (default: True) controlling whether summary ops
should be added to tensorflow graph.
should be added to tensorflow graph.
normalize_loc_loss_by_codesize: whether to normalize localization loss
normalize_loc_loss_by_codesize: whether to normalize localization loss
...
@@ -312,7 +309,6 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -312,7 +309,6 @@ class SSDMetaArch(model.DetectionModel):
the random sampled examples.
the random sampled examples.
expected_classification_loss_under_sampling: If not None, use
expected_classification_loss_under_sampling: If not None, use
to calcualte classification loss by background/foreground weighting.
to calcualte classification loss by background/foreground weighting.
target_assigner_instance: target_assigner.TargetAssigner instance to use.
"""
"""
super
(
SSDMetaArch
,
self
).
__init__
(
num_classes
=
box_predictor
.
num_classes
)
super
(
SSDMetaArch
,
self
).
__init__
(
num_classes
=
box_predictor
.
num_classes
)
self
.
_is_training
=
is_training
self
.
_is_training
=
is_training
...
@@ -324,8 +320,6 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -324,8 +320,6 @@ class SSDMetaArch(model.DetectionModel):
self
.
_box_coder
=
box_coder
self
.
_box_coder
=
box_coder
self
.
_feature_extractor
=
feature_extractor
self
.
_feature_extractor
=
feature_extractor
self
.
_matcher
=
matcher
self
.
_region_similarity_calculator
=
region_similarity_calculator
self
.
_add_background_class
=
add_background_class
self
.
_add_background_class
=
add_background_class
# Needed for fine-tuning from classification checkpoints whose
# Needed for fine-tuning from classification checkpoints whose
...
@@ -347,14 +341,7 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -347,14 +341,7 @@ class SSDMetaArch(model.DetectionModel):
self
.
_unmatched_class_label
=
tf
.
constant
((
self
.
num_classes
+
1
)
*
[
0
],
self
.
_unmatched_class_label
=
tf
.
constant
((
self
.
num_classes
+
1
)
*
[
0
],
tf
.
float32
)
tf
.
float32
)
if
target_assigner_instance
:
self
.
_target_assigner
=
target_assigner_instance
self
.
_target_assigner
=
target_assigner_instance
else
:
self
.
_target_assigner
=
target_assigner
.
TargetAssigner
(
self
.
_region_similarity_calculator
,
self
.
_matcher
,
self
.
_box_coder
,
negative_class_weight
=
negative_class_weight
)
self
.
_classification_loss
=
classification_loss
self
.
_classification_loss
=
classification_loss
self
.
_localization_loss
=
localization_loss
self
.
_localization_loss
=
localization_loss
...
@@ -523,28 +510,25 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -523,28 +510,25 @@ class SSDMetaArch(model.DetectionModel):
im_height
=
image_shape
[
1
],
im_height
=
image_shape
[
1
],
im_width
=
image_shape
[
2
]))
im_width
=
image_shape
[
2
]))
if
self
.
_box_predictor
.
is_keras_model
:
if
self
.
_box_predictor
.
is_keras_model
:
predict
ion
_dict
=
self
.
_box_predictor
(
feature_maps
)
predict
or_results
_dict
=
self
.
_box_predictor
(
feature_maps
)
else
:
else
:
with
slim
.
arg_scope
([
slim
.
batch_norm
],
with
slim
.
arg_scope
([
slim
.
batch_norm
],
is_training
=
(
self
.
_is_training
and
is_training
=
(
self
.
_is_training
and
not
self
.
_freeze_batchnorm
),
not
self
.
_freeze_batchnorm
),
updates_collections
=
batchnorm_updates_collections
):
updates_collections
=
batchnorm_updates_collections
):
predict
ion
_dict
=
self
.
_box_predictor
.
predict
(
predict
or_results
_dict
=
self
.
_box_predictor
.
predict
(
feature_maps
,
self
.
_anchor_generator
.
num_anchors_per_location
())
feature_maps
,
self
.
_anchor_generator
.
num_anchors_per_location
())
box_encodings
=
tf
.
concat
(
prediction_dict
[
'box_encodings'
],
axis
=
1
)
if
box_encodings
.
shape
.
ndims
==
4
and
box_encodings
.
shape
[
2
]
==
1
:
box_encodings
=
tf
.
squeeze
(
box_encodings
,
axis
=
2
)
class_predictions_with_background
=
tf
.
concat
(
prediction_dict
[
'class_predictions_with_background'
],
axis
=
1
)
predictions_dict
=
{
predictions_dict
=
{
'preprocessed_inputs'
:
preprocessed_inputs
,
'preprocessed_inputs'
:
preprocessed_inputs
,
'box_encodings'
:
box_encodings
,
'class_predictions_with_background'
:
class_predictions_with_background
,
'feature_maps'
:
feature_maps
,
'feature_maps'
:
feature_maps
,
'anchors'
:
self
.
_anchors
.
get
()
'anchors'
:
self
.
_anchors
.
get
()
}
}
for
prediction_key
,
prediction_list
in
iter
(
predictor_results_dict
.
items
()):
prediction
=
tf
.
concat
(
prediction_list
,
axis
=
1
)
if
(
prediction_key
==
'box_encodings'
and
prediction
.
shape
.
ndims
==
4
and
prediction
.
shape
[
2
]
==
1
):
prediction
=
tf
.
squeeze
(
prediction
,
axis
=
2
)
predictions_dict
[
prediction_key
]
=
prediction
self
.
_batched_prediction_tensor_names
=
[
x
for
x
in
predictions_dict
self
.
_batched_prediction_tensor_names
=
[
x
for
x
in
predictions_dict
if
x
!=
'anchors'
]
if
x
!=
'anchors'
]
return
predictions_dict
return
predictions_dict
...
@@ -587,6 +571,10 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -587,6 +571,10 @@ class SSDMetaArch(model.DetectionModel):
[batch_size, num_anchors, num_classes+1] containing class predictions
[batch_size, num_anchors, num_classes+1] containing class predictions
(logits) for each of the anchors. Note that this tensor *includes*
(logits) for each of the anchors. Note that this tensor *includes*
background class predictions.
background class predictions.
4) mask_predictions: (optional) a 5-D float tensor of shape
[batch_size, num_anchors, q, mask_height, mask_width]. `q` can be
either number of classes or 1 depending on whether a separate mask is
predicted per class.
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
of true images in the resized images, as resized images can be padded
...
@@ -599,6 +587,8 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -599,6 +587,8 @@ class SSDMetaArch(model.DetectionModel):
detection_classes: [batch, max_detections]
detection_classes: [batch, max_detections]
detection_keypoints: [batch, max_detections, num_keypoints, 2] (if
detection_keypoints: [batch, max_detections, num_keypoints, 2] (if
encoded in the prediction_dict 'box_encodings')
encoded in the prediction_dict 'box_encodings')
detection_masks: [batch_size, max_detections, mask_height, mask_width]
(optional)
num_detections: [batch]
num_detections: [batch]
Raises:
Raises:
ValueError: if prediction_dict does not contain `box_encodings` or
ValueError: if prediction_dict does not contain `box_encodings` or
...
@@ -627,13 +617,14 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -627,13 +617,14 @@ class SSDMetaArch(model.DetectionModel):
if
detection_keypoints
is
not
None
:
if
detection_keypoints
is
not
None
:
additional_fields
=
{
additional_fields
=
{
fields
.
BoxListFields
.
keypoints
:
detection_keypoints
}
fields
.
BoxListFields
.
keypoints
:
detection_keypoints
}
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
_
,
nmsed_
additional_field
s
,
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_
mask
s
,
num_detections
)
=
self
.
_non_max_suppression_fn
(
nmsed_additional_fields
,
num_detections
)
=
self
.
_non_max_suppression_fn
(
detection_boxes
,
detection_boxes
,
detection_scores
,
detection_scores
,
clip_window
=
self
.
_compute_clip_window
(
clip_window
=
self
.
_compute_clip_window
(
preprocessed_images
,
preprocessed_images
,
true_image_shapes
),
true_image_shapes
),
additional_fields
=
additional_fields
)
additional_fields
=
additional_fields
,
masks
=
prediction_dict
.
get
(
'mask_predictions'
))
detection_dict
=
{
detection_dict
=
{
fields
.
DetectionResultFields
.
detection_boxes
:
nmsed_boxes
,
fields
.
DetectionResultFields
.
detection_boxes
:
nmsed_boxes
,
fields
.
DetectionResultFields
.
detection_scores
:
nmsed_scores
,
fields
.
DetectionResultFields
.
detection_scores
:
nmsed_scores
,
...
@@ -645,6 +636,9 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -645,6 +636,9 @@ class SSDMetaArch(model.DetectionModel):
fields
.
BoxListFields
.
keypoints
in
nmsed_additional_fields
):
fields
.
BoxListFields
.
keypoints
in
nmsed_additional_fields
):
detection_dict
[
fields
.
DetectionResultFields
.
detection_keypoints
]
=
(
detection_dict
[
fields
.
DetectionResultFields
.
detection_keypoints
]
=
(
nmsed_additional_fields
[
fields
.
BoxListFields
.
keypoints
])
nmsed_additional_fields
[
fields
.
BoxListFields
.
keypoints
])
if
nmsed_masks
is
not
None
:
detection_dict
[
fields
.
DetectionResultFields
.
detection_masks
]
=
nmsed_masks
return
detection_dict
return
detection_dict
def
loss
(
self
,
prediction_dict
,
true_image_shapes
,
scope
=
None
):
def
loss
(
self
,
prediction_dict
,
true_image_shapes
,
scope
=
None
):
...
@@ -701,16 +695,22 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -701,16 +695,22 @@ class SSDMetaArch(model.DetectionModel):
batch_cls_weights
=
tf
.
multiply
(
batch_sampled_indicator
,
batch_cls_weights
=
tf
.
multiply
(
batch_sampled_indicator
,
batch_cls_weights
)
batch_cls_weights
)
losses_mask
=
None
if
self
.
groundtruth_has_field
(
fields
.
InputDataFields
.
is_annotated
):
losses_mask
=
tf
.
stack
(
self
.
groundtruth_lists
(
fields
.
InputDataFields
.
is_annotated
))
location_losses
=
self
.
_localization_loss
(
location_losses
=
self
.
_localization_loss
(
prediction_dict
[
'box_encodings'
],
prediction_dict
[
'box_encodings'
],
batch_reg_targets
,
batch_reg_targets
,
ignore_nan_targets
=
True
,
ignore_nan_targets
=
True
,
weights
=
batch_reg_weights
)
weights
=
batch_reg_weights
,
losses_mask
=
losses_mask
)
cls_losses
=
self
.
_classification_loss
(
cls_losses
=
self
.
_classification_loss
(
prediction_dict
[
'class_predictions_with_background'
],
prediction_dict
[
'class_predictions_with_background'
],
batch_cls_targets
,
batch_cls_targets
,
weights
=
batch_cls_weights
)
weights
=
batch_cls_weights
,
losses_mask
=
losses_mask
)
if
self
.
_expected_classification_loss_under_sampling
:
if
self
.
_expected_classification_loss_under_sampling
:
if
cls_losses
.
get_shape
().
ndims
==
3
:
if
cls_losses
.
get_shape
().
ndims
==
3
:
...
@@ -734,12 +734,6 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -734,12 +734,6 @@ class SSDMetaArch(model.DetectionModel):
self
.
_hard_example_miner
.
summarize
()
self
.
_hard_example_miner
.
summarize
()
else
:
else
:
cls_losses
=
ops
.
reduce_sum_trailing_dimensions
(
cls_losses
,
ndims
=
2
)
cls_losses
=
ops
.
reduce_sum_trailing_dimensions
(
cls_losses
,
ndims
=
2
)
if
self
.
_add_summaries
:
class_ids
=
tf
.
argmax
(
batch_cls_targets
,
axis
=
2
)
flattened_class_ids
=
tf
.
reshape
(
class_ids
,
[
-
1
])
flattened_classification_losses
=
tf
.
reshape
(
cls_losses
,
[
-
1
])
self
.
_summarize_anchor_classification_loss
(
flattened_class_ids
,
flattened_classification_losses
)
localization_loss
=
tf
.
reduce_sum
(
location_losses
)
localization_loss
=
tf
.
reduce_sum
(
location_losses
)
classification_loss
=
tf
.
reduce_sum
(
cls_losses
)
classification_loss
=
tf
.
reduce_sum
(
cls_losses
)
...
...
research/object_detection/meta_architectures/ssd_meta_arch_test.py
View file @
27b4acd4
...
@@ -14,105 +14,26 @@
...
@@ -14,105 +14,26 @@
# ==============================================================================
# ==============================================================================
"""Tests for object_detection.meta_architectures.ssd_meta_arch."""
"""Tests for object_detection.meta_architectures.ssd_meta_arch."""
import
functools
from
absl.testing
import
parameterized
from
absl.testing
import
parameterized
import
numpy
as
np
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow
as
tf
from
object_detection.core
import
anchor_generator
from
object_detection.core
import
balanced_positive_negative_sampler
as
sampler
from
object_detection.core
import
box_list
from
object_detection.core
import
losses
from
object_detection.core
import
post_processing
from
object_detection.core
import
region_similarity_calculator
as
sim_calc
from
object_detection.core
import
target_assigner
from
object_detection.meta_architectures
import
ssd_meta_arch
from
object_detection.meta_architectures
import
ssd_meta_arch
from
object_detection.utils
import
ops
from
object_detection.meta_architectures
import
ssd_meta_arch_test_lib
from
object_detection.utils
import
test_case
from
object_detection.utils
import
test_utils
from
object_detection.utils
import
test_utils
slim
=
tf
.
contrib
.
slim
slim
=
tf
.
contrib
.
slim
keras
=
tf
.
keras
.
layers
keras
=
tf
.
keras
.
layers
class
FakeSSDFeatureExtractor
(
ssd_meta_arch
.
SSDFeatureExtractor
):
def
__init__
(
self
):
super
(
FakeSSDFeatureExtractor
,
self
).
__init__
(
is_training
=
True
,
depth_multiplier
=
0
,
min_depth
=
0
,
pad_to_multiple
=
1
,
conv_hyperparams_fn
=
None
)
def
preprocess
(
self
,
resized_inputs
):
return
tf
.
identity
(
resized_inputs
)
def
extract_features
(
self
,
preprocessed_inputs
):
with
tf
.
variable_scope
(
'mock_model'
):
features
=
slim
.
conv2d
(
inputs
=
preprocessed_inputs
,
num_outputs
=
32
,
kernel_size
=
1
,
scope
=
'layer1'
)
return
[
features
]
class
FakeSSDKerasFeatureExtractor
(
ssd_meta_arch
.
SSDKerasFeatureExtractor
):
def
__init__
(
self
):
with
tf
.
name_scope
(
'mock_model'
):
super
(
FakeSSDKerasFeatureExtractor
,
self
).
__init__
(
is_training
=
True
,
depth_multiplier
=
0
,
min_depth
=
0
,
pad_to_multiple
=
1
,
conv_hyperparams_config
=
None
,
freeze_batchnorm
=
False
,
inplace_batchnorm_update
=
False
,
)
self
.
_conv
=
keras
.
Conv2D
(
filters
=
32
,
kernel_size
=
1
,
name
=
'layer1'
)
def
preprocess
(
self
,
resized_inputs
):
return
tf
.
identity
(
resized_inputs
)
def
_extract_features
(
self
,
preprocessed_inputs
,
**
kwargs
):
with
tf
.
name_scope
(
'mock_model'
):
return
[
self
.
_conv
(
preprocessed_inputs
)]
class
MockAnchorGenerator2x2
(
anchor_generator
.
AnchorGenerator
):
"""Sets up a simple 2x2 anchor grid on the unit square."""
def
name_scope
(
self
):
return
'MockAnchorGenerator'
def
num_anchors_per_location
(
self
):
return
[
1
]
def
_generate
(
self
,
feature_map_shape_list
,
im_height
,
im_width
):
return
[
box_list
.
BoxList
(
tf
.
constant
([[
0
,
0
,
.
5
,
.
5
],
[
0
,
.
5
,
.
5
,
1
],
[.
5
,
0
,
1
,
.
5
],
[
1.
,
1.
,
1.5
,
1.5
]
# Anchor that is outside clip_window.
],
tf
.
float32
))]
def
num_anchors
(
self
):
return
4
def
_get_value_for_matching_key
(
dictionary
,
suffix
):
for
key
in
dictionary
.
keys
():
if
key
.
endswith
(
suffix
):
return
dictionary
[
key
]
raise
ValueError
(
'key not found {}'
.
format
(
suffix
))
@
parameterized
.
parameters
(
@
parameterized
.
parameters
(
{
'use_keras'
:
False
},
{
'use_keras'
:
False
},
{
'use_keras'
:
True
},
{
'use_keras'
:
True
},
)
)
class
SsdMetaArchTest
(
test_case
.
TestCase
,
parameterized
.
TestCase
):
class
SsdMetaArchTest
(
ssd_meta_arch_test_lib
.
SSDMetaArchTestBase
,
parameterized
.
TestCase
):
def
_create_model
(
self
,
def
_create_model
(
self
,
apply_hard_mining
=
True
,
apply_hard_mining
=
True
,
...
@@ -123,96 +44,25 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
...
@@ -123,96 +44,25 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
use_expected_classification_loss_under_sampling
=
False
,
use_expected_classification_loss_under_sampling
=
False
,
minimum_negative_sampling
=
1
,
minimum_negative_sampling
=
1
,
desired_negative_sampling_ratio
=
3
,
desired_negative_sampling_ratio
=
3
,
use_keras
=
False
):
use_keras
=
False
,
is_training
=
False
predict_mask
=
False
,
num_classes
=
1
use_static_shapes
=
False
,
mock_anchor_generator
=
MockAnchorGenerator2x2
()
nms_max_size_per_class
=
5
):
if
use_keras
:
return
super
(
SsdMetaArchTest
,
self
).
_create_model
(
mock_box_predictor
=
test_utils
.
MockKerasBoxPredictor
(
model_fn
=
ssd_meta_arch
.
SSDMetaArch
,
is_training
,
num_classes
)
apply_hard_mining
=
apply_hard_mining
,
else
:
mock_box_predictor
=
test_utils
.
MockBoxPredictor
(
is_training
,
num_classes
)
mock_box_coder
=
test_utils
.
MockBoxCoder
()
if
use_keras
:
fake_feature_extractor
=
FakeSSDKerasFeatureExtractor
()
else
:
fake_feature_extractor
=
FakeSSDFeatureExtractor
()
mock_matcher
=
test_utils
.
MockMatcher
()
region_similarity_calculator
=
sim_calc
.
IouSimilarity
()
encode_background_as_zeros
=
False
def
image_resizer_fn
(
image
):
return
[
tf
.
identity
(
image
),
tf
.
shape
(
image
)]
classification_loss
=
losses
.
WeightedSigmoidClassificationLoss
()
localization_loss
=
losses
.
WeightedSmoothL1LocalizationLoss
()
non_max_suppression_fn
=
functools
.
partial
(
post_processing
.
batch_multiclass_non_max_suppression
,
score_thresh
=-
20.0
,
iou_thresh
=
1.0
,
max_size_per_class
=
5
,
max_total_size
=
5
)
classification_loss_weight
=
1.0
localization_loss_weight
=
1.0
negative_class_weight
=
1.0
normalize_loss_by_num_matches
=
False
hard_example_miner
=
None
if
apply_hard_mining
:
# This hard example miner is expected to be a no-op.
hard_example_miner
=
losses
.
HardExampleMiner
(
num_hard_examples
=
None
,
iou_threshold
=
1.0
)
random_example_sampler
=
None
if
random_example_sampling
:
random_example_sampler
=
sampler
.
BalancedPositiveNegativeSampler
(
positive_fraction
=
0.5
)
target_assigner_instance
=
target_assigner
.
TargetAssigner
(
region_similarity_calculator
,
mock_matcher
,
mock_box_coder
,
negative_class_weight
=
negative_class_weight
,
weight_regression_loss_by_score
=
weight_regression_loss_by_score
)
expected_classification_loss_under_sampling
=
None
if
use_expected_classification_loss_under_sampling
:
expected_classification_loss_under_sampling
=
functools
.
partial
(
ops
.
expected_classification_loss_under_sampling
,
minimum_negative_sampling
=
minimum_negative_sampling
,
desired_negative_sampling_ratio
=
desired_negative_sampling_ratio
)
code_size
=
4
model
=
ssd_meta_arch
.
SSDMetaArch
(
is_training
,
mock_anchor_generator
,
mock_box_predictor
,
mock_box_coder
,
fake_feature_extractor
,
mock_matcher
,
region_similarity_calculator
,
encode_background_as_zeros
,
negative_class_weight
,
image_resizer_fn
,
non_max_suppression_fn
,
tf
.
identity
,
classification_loss
,
localization_loss
,
classification_loss_weight
,
localization_loss_weight
,
normalize_loss_by_num_matches
,
hard_example_miner
,
target_assigner_instance
=
target_assigner_instance
,
add_summaries
=
False
,
normalize_loc_loss_by_codesize
=
normalize_loc_loss_by_codesize
,
normalize_loc_loss_by_codesize
=
normalize_loc_loss_by_codesize
,
freeze_batchnorm
=
False
,
inplace_batchnorm_update
=
False
,
add_background_class
=
add_background_class
,
add_background_class
=
add_background_class
,
random_example_sampler
=
random_example_sampler
,
random_example_sampling
=
random_example_sampling
,
expected_classification_loss_under_sampling
=
weight_regression_loss_by_score
=
weight_regression_loss_by_score
,
expected_classification_loss_under_sampling
)
use_expected_classification_loss_under_sampling
=
return
model
,
num_classes
,
mock_anchor_generator
.
num_anchors
(),
code_size
use_expected_classification_loss_under_sampling
,
minimum_negative_sampling
=
minimum_negative_sampling
,
desired_negative_sampling_ratio
=
desired_negative_sampling_ratio
,
use_keras
=
use_keras
,
predict_mask
=
predict_mask
,
use_static_shapes
=
use_static_shapes
,
nms_max_size_per_class
=
nms_max_size_per_class
)
def
test_preprocess_preserves_shapes_with_dynamic_input_image
(
def
test_preprocess_preserves_shapes_with_dynamic_input_image
(
self
,
use_keras
):
self
,
use_keras
):
...
@@ -360,6 +210,7 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
...
@@ -360,6 +210,7 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
self
.
assertAllClose
(
detections_out
[
'num_detections'
],
self
.
assertAllClose
(
detections_out
[
'num_detections'
],
expected_num_detections
)
expected_num_detections
)
def
test_loss_results_are_correct
(
self
,
use_keras
):
def
test_loss_results_are_correct
(
self
,
use_keras
):
with
tf
.
Graph
().
as_default
():
with
tf
.
Graph
().
as_default
():
...
@@ -374,9 +225,10 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
...
@@ -374,9 +225,10 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
prediction_dict
=
model
.
predict
(
preprocessed_tensor
,
prediction_dict
=
model
.
predict
(
preprocessed_tensor
,
true_image_shapes
=
None
)
true_image_shapes
=
None
)
loss_dict
=
model
.
loss
(
prediction_dict
,
true_image_shapes
=
None
)
loss_dict
=
model
.
loss
(
prediction_dict
,
true_image_shapes
=
None
)
return
(
return
(
self
.
_get_value_for_matching_key
(
loss_dict
,
_get_value_for_matching_key
(
loss_dict
,
'Loss/localization_loss'
),
'Loss/localization_loss'
),
_get_value_for_matching_key
(
loss_dict
,
'Loss/classification_loss'
))
self
.
_get_value_for_matching_key
(
loss_dict
,
'Loss/classification_loss'
))
batch_size
=
2
batch_size
=
2
preprocessed_input
=
np
.
random
.
rand
(
batch_size
,
2
,
2
,
3
).
astype
(
np
.
float32
)
preprocessed_input
=
np
.
random
.
rand
(
batch_size
,
2
,
2
,
3
).
astype
(
np
.
float32
)
...
@@ -413,7 +265,8 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
...
@@ -413,7 +265,8 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
prediction_dict
=
model
.
predict
(
preprocessed_tensor
,
prediction_dict
=
model
.
predict
(
preprocessed_tensor
,
true_image_shapes
=
None
)
true_image_shapes
=
None
)
loss_dict
=
model
.
loss
(
prediction_dict
,
true_image_shapes
=
None
)
loss_dict
=
model
.
loss
(
prediction_dict
,
true_image_shapes
=
None
)
return
(
_get_value_for_matching_key
(
loss_dict
,
'Loss/localization_loss'
),)
return
(
self
.
_get_value_for_matching_key
(
loss_dict
,
'Loss/localization_loss'
),)
batch_size
=
2
batch_size
=
2
preprocessed_input
=
np
.
random
.
rand
(
batch_size
,
2
,
2
,
3
).
astype
(
np
.
float32
)
preprocessed_input
=
np
.
random
.
rand
(
batch_size
,
2
,
2
,
3
).
astype
(
np
.
float32
)
...
@@ -443,9 +296,10 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
...
@@ -443,9 +296,10 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
prediction_dict
=
model
.
predict
(
preprocessed_tensor
,
prediction_dict
=
model
.
predict
(
preprocessed_tensor
,
true_image_shapes
=
None
)
true_image_shapes
=
None
)
loss_dict
=
model
.
loss
(
prediction_dict
,
true_image_shapes
=
None
)
loss_dict
=
model
.
loss
(
prediction_dict
,
true_image_shapes
=
None
)
return
(
return
(
self
.
_get_value_for_matching_key
(
loss_dict
,
_get_value_for_matching_key
(
loss_dict
,
'Loss/localization_loss'
),
'Loss/localization_loss'
),
_get_value_for_matching_key
(
loss_dict
,
'Loss/classification_loss'
))
self
.
_get_value_for_matching_key
(
loss_dict
,
'Loss/classification_loss'
))
batch_size
=
2
batch_size
=
2
preprocessed_input
=
np
.
random
.
rand
(
batch_size
,
2
,
2
,
3
).
astype
(
np
.
float32
)
preprocessed_input
=
np
.
random
.
rand
(
batch_size
,
2
,
2
,
3
).
astype
(
np
.
float32
)
...
@@ -591,6 +445,55 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
...
@@ -591,6 +445,55 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
self
.
assertAllClose
(
localization_loss
,
expected_localization_loss
)
self
.
assertAllClose
(
localization_loss
,
expected_localization_loss
)
self
.
assertAllClose
(
classification_loss
,
expected_classification_loss
)
self
.
assertAllClose
(
classification_loss
,
expected_classification_loss
)
def
test_loss_results_are_correct_with_losses_mask
(
self
,
use_keras
):
with
tf
.
Graph
().
as_default
():
_
,
num_classes
,
num_anchors
,
_
=
self
.
_create_model
(
use_keras
=
use_keras
)
def
graph_fn
(
preprocessed_tensor
,
groundtruth_boxes1
,
groundtruth_boxes2
,
groundtruth_boxes3
,
groundtruth_classes1
,
groundtruth_classes2
,
groundtruth_classes3
):
groundtruth_boxes_list
=
[
groundtruth_boxes1
,
groundtruth_boxes2
,
groundtruth_boxes3
]
groundtruth_classes_list
=
[
groundtruth_classes1
,
groundtruth_classes2
,
groundtruth_classes3
]
is_annotated_list
=
[
tf
.
constant
(
True
),
tf
.
constant
(
True
),
tf
.
constant
(
False
)]
model
,
_
,
_
,
_
=
self
.
_create_model
(
apply_hard_mining
=
False
)
model
.
provide_groundtruth
(
groundtruth_boxes_list
,
groundtruth_classes_list
,
is_annotated_list
=
is_annotated_list
)
prediction_dict
=
model
.
predict
(
preprocessed_tensor
,
true_image_shapes
=
None
)
loss_dict
=
model
.
loss
(
prediction_dict
,
true_image_shapes
=
None
)
return
(
self
.
_get_value_for_matching_key
(
loss_dict
,
'Loss/localization_loss'
),
self
.
_get_value_for_matching_key
(
loss_dict
,
'Loss/classification_loss'
))
batch_size
=
3
preprocessed_input
=
np
.
random
.
rand
(
batch_size
,
2
,
2
,
3
).
astype
(
np
.
float32
)
groundtruth_boxes1
=
np
.
array
([[
0
,
0
,
.
5
,
.
5
]],
dtype
=
np
.
float32
)
groundtruth_boxes2
=
np
.
array
([[
0
,
0
,
.
5
,
.
5
]],
dtype
=
np
.
float32
)
groundtruth_boxes3
=
np
.
array
([[
0
,
0
,
.
5
,
.
5
]],
dtype
=
np
.
float32
)
groundtruth_classes1
=
np
.
array
([[
1
]],
dtype
=
np
.
float32
)
groundtruth_classes2
=
np
.
array
([[
1
]],
dtype
=
np
.
float32
)
groundtruth_classes3
=
np
.
array
([[
1
]],
dtype
=
np
.
float32
)
expected_localization_loss
=
0.0
# Note that we are subtracting 1 from batch_size, since the final image is
# not annotated.
expected_classification_loss
=
((
batch_size
-
1
)
*
num_anchors
*
(
num_classes
+
1
)
*
np
.
log
(
2.0
))
(
localization_loss
,
classification_loss
)
=
self
.
execute
(
graph_fn
,
[
preprocessed_input
,
groundtruth_boxes1
,
groundtruth_boxes2
,
groundtruth_boxes3
,
groundtruth_classes1
,
groundtruth_classes2
,
groundtruth_classes3
])
self
.
assertAllClose
(
localization_loss
,
expected_localization_loss
)
self
.
assertAllClose
(
classification_loss
,
expected_classification_loss
)
def
test_restore_map_for_detection_ckpt
(
self
,
use_keras
):
def
test_restore_map_for_detection_ckpt
(
self
,
use_keras
):
model
,
_
,
_
,
_
=
self
.
_create_model
(
use_keras
=
use_keras
)
model
,
_
,
_
,
_
=
self
.
_create_model
(
use_keras
=
use_keras
)
model
.
predict
(
tf
.
constant
(
np
.
array
([[[[
0
,
0
],
[
1
,
1
]],
[[
1
,
0
],
[
0
,
1
]]]],
model
.
predict
(
tf
.
constant
(
np
.
array
([[[[
0
,
0
],
[
1
,
1
]],
[[
1
,
0
],
[
0
,
1
]]]],
...
@@ -678,10 +581,8 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
...
@@ -678,10 +581,8 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
use_keras
):
use_keras
):
with
tf
.
Graph
().
as_default
():
with
tf
.
Graph
().
as_default
():
_
,
num_classes
,
num_anchors
,
_
=
self
.
_create_model
(
_
,
num_classes
,
_
,
_
=
self
.
_create_model
(
random_example_sampling
=
True
,
random_example_sampling
=
True
,
use_keras
=
use_keras
)
use_keras
=
use_keras
)
print
num_classes
,
num_anchors
def
graph_fn
(
preprocessed_tensor
,
groundtruth_boxes1
,
groundtruth_boxes2
,
def
graph_fn
(
preprocessed_tensor
,
groundtruth_boxes1
,
groundtruth_boxes2
,
groundtruth_classes1
,
groundtruth_classes2
):
groundtruth_classes1
,
groundtruth_classes2
):
...
@@ -694,9 +595,10 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
...
@@ -694,9 +595,10 @@ class SsdMetaArchTest(test_case.TestCase, parameterized.TestCase):
prediction_dict
=
model
.
predict
(
prediction_dict
=
model
.
predict
(
preprocessed_tensor
,
true_image_shapes
=
None
)
preprocessed_tensor
,
true_image_shapes
=
None
)
loss_dict
=
model
.
loss
(
prediction_dict
,
true_image_shapes
=
None
)
loss_dict
=
model
.
loss
(
prediction_dict
,
true_image_shapes
=
None
)
return
(
_get_value_for_matching_key
(
loss_dict
,
'Loss/localization_loss'
),
return
(
self
.
_get_value_for_matching_key
(
loss_dict
,
_get_value_for_matching_key
(
loss_dict
,
'Loss/localization_loss'
),
'Loss/classification_loss'
))
self
.
_get_value_for_matching_key
(
loss_dict
,
'Loss/classification_loss'
))
batch_size
=
2
batch_size
=
2
preprocessed_input
=
np
.
random
.
rand
(
batch_size
,
2
,
2
,
3
).
astype
(
np
.
float32
)
preprocessed_input
=
np
.
random
.
rand
(
batch_size
,
2
,
2
,
3
).
astype
(
np
.
float32
)
...
...
research/object_detection/meta_architectures/ssd_meta_arch_test_lib.py
0 → 100644
View file @
27b4acd4
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Helper functions for SSD models meta architecture tests."""
import
functools
import
tensorflow
as
tf
from
object_detection.core
import
anchor_generator
from
object_detection.core
import
balanced_positive_negative_sampler
as
sampler
from
object_detection.core
import
box_list
from
object_detection.core
import
losses
from
object_detection.core
import
post_processing
from
object_detection.core
import
region_similarity_calculator
as
sim_calc
from
object_detection.core
import
target_assigner
from
object_detection.meta_architectures
import
ssd_meta_arch
from
object_detection.utils
import
ops
from
object_detection.utils
import
test_case
from
object_detection.utils
import
test_utils
slim
=
tf
.
contrib
.
slim
keras
=
tf
.
keras
.
layers
class
FakeSSDFeatureExtractor
(
ssd_meta_arch
.
SSDFeatureExtractor
):
"""Fake ssd feature extracture for ssd meta arch tests."""
def
__init__
(
self
):
super
(
FakeSSDFeatureExtractor
,
self
).
__init__
(
is_training
=
True
,
depth_multiplier
=
0
,
min_depth
=
0
,
pad_to_multiple
=
1
,
conv_hyperparams_fn
=
None
)
def
preprocess
(
self
,
resized_inputs
):
return
tf
.
identity
(
resized_inputs
)
def
extract_features
(
self
,
preprocessed_inputs
):
with
tf
.
variable_scope
(
'mock_model'
):
features
=
slim
.
conv2d
(
inputs
=
preprocessed_inputs
,
num_outputs
=
32
,
kernel_size
=
1
,
scope
=
'layer1'
)
return
[
features
]
class
FakeSSDKerasFeatureExtractor
(
ssd_meta_arch
.
SSDKerasFeatureExtractor
):
"""Fake keras based ssd feature extracture for ssd meta arch tests."""
def
__init__
(
self
):
with
tf
.
name_scope
(
'mock_model'
):
super
(
FakeSSDKerasFeatureExtractor
,
self
).
__init__
(
is_training
=
True
,
depth_multiplier
=
0
,
min_depth
=
0
,
pad_to_multiple
=
1
,
conv_hyperparams
=
None
,
freeze_batchnorm
=
False
,
inplace_batchnorm_update
=
False
,
)
self
.
_conv
=
keras
.
Conv2D
(
filters
=
32
,
kernel_size
=
1
,
name
=
'layer1'
)
def
preprocess
(
self
,
resized_inputs
):
return
tf
.
identity
(
resized_inputs
)
def
_extract_features
(
self
,
preprocessed_inputs
,
**
kwargs
):
with
tf
.
name_scope
(
'mock_model'
):
return
[
self
.
_conv
(
preprocessed_inputs
)]
class
MockAnchorGenerator2x2
(
anchor_generator
.
AnchorGenerator
):
"""A simple 2x2 anchor grid on the unit square used for test only."""
def
name_scope
(
self
):
return
'MockAnchorGenerator'
def
num_anchors_per_location
(
self
):
return
[
1
]
def
_generate
(
self
,
feature_map_shape_list
,
im_height
,
im_width
):
return
[
box_list
.
BoxList
(
tf
.
constant
(
[
[
0
,
0
,
.
5
,
.
5
],
[
0
,
.
5
,
.
5
,
1
],
[.
5
,
0
,
1
,
.
5
],
[
1.
,
1.
,
1.5
,
1.5
]
# Anchor that is outside clip_window.
],
tf
.
float32
))
]
def
num_anchors
(
self
):
return
4
class
SSDMetaArchTestBase
(
test_case
.
TestCase
):
"""Base class to test SSD based meta architectures."""
def
_create_model
(
self
,
model_fn
=
ssd_meta_arch
.
SSDMetaArch
,
apply_hard_mining
=
True
,
normalize_loc_loss_by_codesize
=
False
,
add_background_class
=
True
,
random_example_sampling
=
False
,
weight_regression_loss_by_score
=
False
,
use_expected_classification_loss_under_sampling
=
False
,
minimum_negative_sampling
=
1
,
desired_negative_sampling_ratio
=
3
,
use_keras
=
False
,
predict_mask
=
False
,
use_static_shapes
=
False
,
nms_max_size_per_class
=
5
):
is_training
=
False
num_classes
=
1
mock_anchor_generator
=
MockAnchorGenerator2x2
()
if
use_keras
:
mock_box_predictor
=
test_utils
.
MockKerasBoxPredictor
(
is_training
,
num_classes
,
predict_mask
=
predict_mask
)
else
:
mock_box_predictor
=
test_utils
.
MockBoxPredictor
(
is_training
,
num_classes
,
predict_mask
=
predict_mask
)
mock_box_coder
=
test_utils
.
MockBoxCoder
()
if
use_keras
:
fake_feature_extractor
=
FakeSSDKerasFeatureExtractor
()
else
:
fake_feature_extractor
=
FakeSSDFeatureExtractor
()
mock_matcher
=
test_utils
.
MockMatcher
()
region_similarity_calculator
=
sim_calc
.
IouSimilarity
()
encode_background_as_zeros
=
False
def
image_resizer_fn
(
image
):
return
[
tf
.
identity
(
image
),
tf
.
shape
(
image
)]
classification_loss
=
losses
.
WeightedSigmoidClassificationLoss
()
localization_loss
=
losses
.
WeightedSmoothL1LocalizationLoss
()
non_max_suppression_fn
=
functools
.
partial
(
post_processing
.
batch_multiclass_non_max_suppression
,
score_thresh
=-
20.0
,
iou_thresh
=
1.0
,
max_size_per_class
=
nms_max_size_per_class
,
max_total_size
=
nms_max_size_per_class
,
use_static_shapes
=
use_static_shapes
)
classification_loss_weight
=
1.0
localization_loss_weight
=
1.0
negative_class_weight
=
1.0
normalize_loss_by_num_matches
=
False
hard_example_miner
=
None
if
apply_hard_mining
:
# This hard example miner is expected to be a no-op.
hard_example_miner
=
losses
.
HardExampleMiner
(
num_hard_examples
=
None
,
iou_threshold
=
1.0
)
random_example_sampler
=
None
if
random_example_sampling
:
random_example_sampler
=
sampler
.
BalancedPositiveNegativeSampler
(
positive_fraction
=
0.5
)
target_assigner_instance
=
target_assigner
.
TargetAssigner
(
region_similarity_calculator
,
mock_matcher
,
mock_box_coder
,
negative_class_weight
=
negative_class_weight
,
weight_regression_loss_by_score
=
weight_regression_loss_by_score
)
expected_classification_loss_under_sampling
=
None
if
use_expected_classification_loss_under_sampling
:
expected_classification_loss_under_sampling
=
functools
.
partial
(
ops
.
expected_classification_loss_under_sampling
,
minimum_negative_sampling
=
minimum_negative_sampling
,
desired_negative_sampling_ratio
=
desired_negative_sampling_ratio
)
code_size
=
4
model
=
model_fn
(
is_training
=
is_training
,
anchor_generator
=
mock_anchor_generator
,
box_predictor
=
mock_box_predictor
,
box_coder
=
mock_box_coder
,
feature_extractor
=
fake_feature_extractor
,
encode_background_as_zeros
=
encode_background_as_zeros
,
image_resizer_fn
=
image_resizer_fn
,
non_max_suppression_fn
=
non_max_suppression_fn
,
score_conversion_fn
=
tf
.
identity
,
classification_loss
=
classification_loss
,
localization_loss
=
localization_loss
,
classification_loss_weight
=
classification_loss_weight
,
localization_loss_weight
=
localization_loss_weight
,
normalize_loss_by_num_matches
=
normalize_loss_by_num_matches
,
hard_example_miner
=
hard_example_miner
,
target_assigner_instance
=
target_assigner_instance
,
add_summaries
=
False
,
normalize_loc_loss_by_codesize
=
normalize_loc_loss_by_codesize
,
freeze_batchnorm
=
False
,
inplace_batchnorm_update
=
False
,
add_background_class
=
add_background_class
,
random_example_sampler
=
random_example_sampler
,
expected_classification_loss_under_sampling
=
expected_classification_loss_under_sampling
)
return
model
,
num_classes
,
mock_anchor_generator
.
num_anchors
(),
code_size
def
_get_value_for_matching_key
(
self
,
dictionary
,
suffix
):
for
key
in
dictionary
.
keys
():
if
key
.
endswith
(
suffix
):
return
dictionary
[
key
]
raise
ValueError
(
'key not found {}'
.
format
(
suffix
))
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/metrics/coco_evaluation.py
View file @
27b4acd4
...
@@ -18,6 +18,7 @@ import tensorflow as tf
...
@@ -18,6 +18,7 @@ import tensorflow as tf
from
object_detection.core
import
standard_fields
from
object_detection.core
import
standard_fields
from
object_detection.metrics
import
coco_tools
from
object_detection.metrics
import
coco_tools
from
object_detection.utils
import
json_utils
from
object_detection.utils
import
object_detection_evaluation
from
object_detection.utils
import
object_detection_evaluation
...
@@ -148,6 +149,19 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
...
@@ -148,6 +149,19 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
detection_classes
]))
detection_classes
]))
self
.
_image_ids
[
image_id
]
=
True
self
.
_image_ids
[
image_id
]
=
True
def
dump_detections_to_json_file
(
self
,
json_output_path
):
"""Saves the detections into json_output_path in the format used by MS COCO.
Args:
json_output_path: String containing the output file's path. It can be also
None. In that case nothing will be written to the output file.
"""
if
json_output_path
and
json_output_path
is
not
None
:
with
tf
.
gfile
.
GFile
(
json_output_path
,
'w'
)
as
fid
:
tf
.
logging
.
info
(
'Dumping detections to output json file.'
)
json_utils
.
Dump
(
obj
=
self
.
_detection_boxes_list
,
fid
=
fid
,
float_digits
=
4
,
indent
=
2
)
def
evaluate
(
self
):
def
evaluate
(
self
):
"""Evaluates the detection boxes and returns a dictionary of coco metrics.
"""Evaluates the detection boxes and returns a dictionary of coco metrics.
...
@@ -245,10 +259,11 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
...
@@ -245,10 +259,11 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
detection_boxes_batched
,
detection_scores_batched
,
detection_boxes_batched
,
detection_scores_batched
,
detection_classes_batched
,
num_det_boxes_per_image
):
detection_classes_batched
,
num_det_boxes_per_image
):
self
.
add_single_ground_truth_image_info
(
self
.
add_single_ground_truth_image_info
(
image_id
,
image_id
,
{
{
'groundtruth_boxes'
:
gt_box
[:
num_gt_box
],
'groundtruth_boxes'
:
gt_box
[:
num_gt_box
],
'groundtruth_classes'
:
gt_class
[:
num_gt_box
],
'groundtruth_classes'
:
gt_class
[:
num_gt_box
],
'groundtruth_is_crowd'
:
gt_is_crowd
[:
num_gt_box
]})
'groundtruth_is_crowd'
:
gt_is_crowd
[:
num_gt_box
]
})
self
.
add_single_detected_image_info
(
self
.
add_single_detected_image_info
(
image_id
,
image_id
,
{
'detection_boxes'
:
det_box
[:
num_det_box
],
{
'detection_boxes'
:
det_box
[:
num_det_box
],
...
@@ -268,8 +283,7 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
...
@@ -268,8 +283,7 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
detection_classes
=
eval_dict
[
detection_fields
.
detection_classes
]
detection_classes
=
eval_dict
[
detection_fields
.
detection_classes
]
num_gt_boxes_per_image
=
eval_dict
.
get
(
num_gt_boxes_per_image
=
eval_dict
.
get
(
'num_groundtruth_boxes_per_image'
,
None
)
'num_groundtruth_boxes_per_image'
,
None
)
num_det_boxes_per_image
=
eval_dict
.
get
(
num_det_boxes_per_image
=
eval_dict
.
get
(
'num_det_boxes_per_image'
,
None
)
'num_groundtruth_boxes_per_image'
,
None
)
if
groundtruth_is_crowd
is
None
:
if
groundtruth_is_crowd
is
None
:
groundtruth_is_crowd
=
tf
.
zeros_like
(
groundtruth_classes
,
dtype
=
tf
.
bool
)
groundtruth_is_crowd
=
tf
.
zeros_like
(
groundtruth_classes
,
dtype
=
tf
.
bool
)
...
@@ -491,6 +505,19 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
...
@@ -491,6 +505,19 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
detection_classes
]))
detection_classes
]))
self
.
_image_ids_with_detections
.
update
([
image_id
])
self
.
_image_ids_with_detections
.
update
([
image_id
])
def
dump_detections_to_json_file
(
self
,
json_output_path
):
"""Saves the detections into json_output_path in the format used by MS COCO.
Args:
json_output_path: String containing the output file's path. It can be also
None. In that case nothing will be written to the output file.
"""
if
json_output_path
and
json_output_path
is
not
None
:
tf
.
logging
.
info
(
'Dumping detections to output json file.'
)
with
tf
.
gfile
.
GFile
(
json_output_path
,
'w'
)
as
fid
:
json_utils
.
Dump
(
obj
=
self
.
_detection_masks_list
,
fid
=
fid
,
float_digits
=
4
,
indent
=
2
)
def
evaluate
(
self
):
def
evaluate
(
self
):
"""Evaluates the detection masks and returns a dictionary of coco metrics.
"""Evaluates the detection masks and returns a dictionary of coco metrics.
...
...
research/object_detection/metrics/coco_evaluation_test.py
View file @
27b4acd4
...
@@ -24,14 +24,25 @@ from object_detection.core import standard_fields
...
@@ -24,14 +24,25 @@ from object_detection.core import standard_fields
from
object_detection.metrics
import
coco_evaluation
from
object_detection.metrics
import
coco_evaluation
def
_get_categories_list
():
return
[{
'id'
:
1
,
'name'
:
'person'
},
{
'id'
:
2
,
'name'
:
'dog'
},
{
'id'
:
3
,
'name'
:
'cat'
}]
class
CocoDetectionEvaluationTest
(
tf
.
test
.
TestCase
):
class
CocoDetectionEvaluationTest
(
tf
.
test
.
TestCase
):
def
testGetOneMAPWithMatchingGroundtruthAndDetections
(
self
):
def
testGetOneMAPWithMatchingGroundtruthAndDetections
(
self
):
"""Tests that mAP is calculated correctly on GT and Detections."""
"""Tests that mAP is calculated correctly on GT and Detections."""
category_list
=
[{
'id'
:
0
,
'name'
:
'person'
},
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
{
'id'
:
1
,
'name'
:
'cat'
},
_get_categories_list
())
{
'id'
:
2
,
'name'
:
'dog'
}]
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
category_list
)
coco_evaluator
.
add_single_ground_truth_image_info
(
coco_evaluator
.
add_single_ground_truth_image_info
(
image_id
=
'image1'
,
image_id
=
'image1'
,
groundtruth_dict
=
{
groundtruth_dict
=
{
...
@@ -88,17 +99,8 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
...
@@ -88,17 +99,8 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
def
testGetOneMAPWithMatchingGroundtruthAndDetectionsSkipCrowd
(
self
):
def
testGetOneMAPWithMatchingGroundtruthAndDetectionsSkipCrowd
(
self
):
"""Tests computing mAP with is_crowd GT boxes skipped."""
"""Tests computing mAP with is_crowd GT boxes skipped."""
category_list
=
[{
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
'id'
:
0
,
_get_categories_list
())
'name'
:
'person'
},
{
'id'
:
1
,
'name'
:
'cat'
},
{
'id'
:
2
,
'name'
:
'dog'
}]
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
category_list
)
coco_evaluator
.
add_single_ground_truth_image_info
(
coco_evaluator
.
add_single_ground_truth_image_info
(
image_id
=
'image1'
,
image_id
=
'image1'
,
groundtruth_dict
=
{
groundtruth_dict
=
{
...
@@ -124,17 +126,8 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
...
@@ -124,17 +126,8 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
def
testGetOneMAPWithMatchingGroundtruthAndDetectionsEmptyCrowd
(
self
):
def
testGetOneMAPWithMatchingGroundtruthAndDetectionsEmptyCrowd
(
self
):
"""Tests computing mAP with empty is_crowd array passed in."""
"""Tests computing mAP with empty is_crowd array passed in."""
category_list
=
[{
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
'id'
:
0
,
_get_categories_list
())
'name'
:
'person'
},
{
'id'
:
1
,
'name'
:
'cat'
},
{
'id'
:
2
,
'name'
:
'dog'
}]
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
category_list
)
coco_evaluator
.
add_single_ground_truth_image_info
(
coco_evaluator
.
add_single_ground_truth_image_info
(
image_id
=
'image1'
,
image_id
=
'image1'
,
groundtruth_dict
=
{
groundtruth_dict
=
{
...
@@ -160,11 +153,9 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
...
@@ -160,11 +153,9 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
def
testRejectionOnDuplicateGroundtruth
(
self
):
def
testRejectionOnDuplicateGroundtruth
(
self
):
"""Tests that groundtruth cannot be added more than once for an image."""
"""Tests that groundtruth cannot be added more than once for an image."""
categories
=
[{
'id'
:
1
,
'name'
:
'cat'
},
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
{
'id'
:
2
,
'name'
:
'dog'
},
_get_categories_list
())
{
'id'
:
3
,
'name'
:
'elephant'
}]
# Add groundtruth
# Add groundtruth
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
categories
)
image_key1
=
'img1'
image_key1
=
'img1'
groundtruth_boxes1
=
np
.
array
([[
0
,
0
,
1
,
1
],
[
0
,
0
,
2
,
2
],
[
0
,
0
,
3
,
3
]],
groundtruth_boxes1
=
np
.
array
([[
0
,
0
,
1
,
1
],
[
0
,
0
,
2
,
2
],
[
0
,
0
,
3
,
3
]],
dtype
=
float
)
dtype
=
float
)
...
@@ -189,11 +180,9 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
...
@@ -189,11 +180,9 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
def
testRejectionOnDuplicateDetections
(
self
):
def
testRejectionOnDuplicateDetections
(
self
):
"""Tests that detections cannot be added more than once for an image."""
"""Tests that detections cannot be added more than once for an image."""
categories
=
[{
'id'
:
1
,
'name'
:
'cat'
},
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
{
'id'
:
2
,
'name'
:
'dog'
},
_get_categories_list
())
{
'id'
:
3
,
'name'
:
'elephant'
}]
# Add groundtruth
# Add groundtruth
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
categories
)
coco_evaluator
.
add_single_ground_truth_image_info
(
coco_evaluator
.
add_single_ground_truth_image_info
(
image_id
=
'image1'
,
image_id
=
'image1'
,
groundtruth_dict
=
{
groundtruth_dict
=
{
...
@@ -227,10 +216,8 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
...
@@ -227,10 +216,8 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
def
testExceptionRaisedWithMissingGroundtruth
(
self
):
def
testExceptionRaisedWithMissingGroundtruth
(
self
):
"""Tests that exception is raised for detection with missing groundtruth."""
"""Tests that exception is raised for detection with missing groundtruth."""
categories
=
[{
'id'
:
1
,
'name'
:
'cat'
},
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
{
'id'
:
2
,
'name'
:
'dog'
},
_get_categories_list
())
{
'id'
:
3
,
'name'
:
'elephant'
}]
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
categories
)
with
self
.
assertRaises
(
ValueError
):
with
self
.
assertRaises
(
ValueError
):
coco_evaluator
.
add_single_detected_image_info
(
coco_evaluator
.
add_single_detected_image_info
(
image_id
=
'image1'
,
image_id
=
'image1'
,
...
@@ -247,10 +234,8 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
...
@@ -247,10 +234,8 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
class
CocoEvaluationPyFuncTest
(
tf
.
test
.
TestCase
):
class
CocoEvaluationPyFuncTest
(
tf
.
test
.
TestCase
):
def
testGetOneMAPWithMatchingGroundtruthAndDetections
(
self
):
def
testGetOneMAPWithMatchingGroundtruthAndDetections
(
self
):
category_list
=
[{
'id'
:
0
,
'name'
:
'person'
},
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
{
'id'
:
1
,
'name'
:
'cat'
},
_get_categories_list
())
{
'id'
:
2
,
'name'
:
'dog'
}]
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
category_list
)
image_id
=
tf
.
placeholder
(
tf
.
string
,
shape
=
())
image_id
=
tf
.
placeholder
(
tf
.
string
,
shape
=
())
groundtruth_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
4
))
groundtruth_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
4
))
groundtruth_classes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
))
groundtruth_classes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
))
...
@@ -310,31 +295,22 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
...
@@ -310,31 +295,22 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP@.75IOU'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP@.75IOU'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (medium)'
],
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (medium)'
],
-
1.0
)
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (small)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (small)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@1'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@1'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@10'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@10'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (medium)'
],
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (medium)'
],
-
1.0
)
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (small)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (small)'
],
1.0
)
self
.
assertFalse
(
coco_evaluator
.
_groundtruth_list
)
self
.
assertFalse
(
coco_evaluator
.
_groundtruth_list
)
self
.
assertFalse
(
coco_evaluator
.
_detection_boxes_list
)
self
.
assertFalse
(
coco_evaluator
.
_detection_boxes_list
)
self
.
assertFalse
(
coco_evaluator
.
_image_ids
)
self
.
assertFalse
(
coco_evaluator
.
_image_ids
)
def
testGetOneMAPWithMatchingGroundtruthAndDetectionsPadded
(
self
):
def
testGetOneMAPWithMatchingGroundtruthAndDetectionsPadded
(
self
):
category_list
=
[{
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
'id'
:
0
,
_get_categories_list
())
'name'
:
'person'
},
{
'id'
:
1
,
'name'
:
'cat'
},
{
'id'
:
2
,
'name'
:
'dog'
}]
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
category_list
)
image_id
=
tf
.
placeholder
(
tf
.
string
,
shape
=
())
image_id
=
tf
.
placeholder
(
tf
.
string
,
shape
=
())
groundtruth_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
4
))
groundtruth_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
4
))
groundtruth_classes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
))
groundtruth_classes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
))
...
@@ -415,24 +391,22 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
...
@@ -415,24 +391,22 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP@.75IOU'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP@.75IOU'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (medium)'
],
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (medium)'
],
-
1.0
)
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (small)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (small)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@1'
],
0.
75
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@1'
],
0.
83333331
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@10'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@10'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (medium)'
],
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (medium)'
],
-
1.0
)
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (small)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (small)'
],
1.0
)
self
.
assertFalse
(
coco_evaluator
.
_groundtruth_list
)
self
.
assertFalse
(
coco_evaluator
.
_groundtruth_list
)
self
.
assertFalse
(
coco_evaluator
.
_detection_boxes_list
)
self
.
assertFalse
(
coco_evaluator
.
_detection_boxes_list
)
self
.
assertFalse
(
coco_evaluator
.
_image_ids
)
self
.
assertFalse
(
coco_evaluator
.
_image_ids
)
def
testGetOneMAPWithMatchingGroundtruthAndDetectionsBatched
(
self
):
def
testGetOneMAPWithMatchingGroundtruthAndDetectionsBatched
(
self
):
category_list
=
[{
'id'
:
0
,
'name'
:
'person'
},
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
{
'id'
:
1
,
'name'
:
'cat'
},
_get_categories_list
())
{
'id'
:
2
,
'name'
:
'dog'
}]
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
category_list
)
batch_size
=
3
batch_size
=
3
image_id
=
tf
.
placeholder
(
tf
.
string
,
shape
=
(
batch_size
))
image_id
=
tf
.
placeholder
(
tf
.
string
,
shape
=
(
batch_size
))
groundtruth_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
batch_size
,
None
,
4
))
groundtruth_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
batch_size
,
None
,
4
))
...
@@ -479,24 +453,22 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
...
@@ -479,24 +453,22 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP@.75IOU'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP@.75IOU'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (medium)'
],
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (medium)'
],
-
1.0
)
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (small)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (small)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@1'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@1'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@10'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@10'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (medium)'
],
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (medium)'
],
-
1.0
)
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (small)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (small)'
],
1.0
)
self
.
assertFalse
(
coco_evaluator
.
_groundtruth_list
)
self
.
assertFalse
(
coco_evaluator
.
_groundtruth_list
)
self
.
assertFalse
(
coco_evaluator
.
_detection_boxes_list
)
self
.
assertFalse
(
coco_evaluator
.
_detection_boxes_list
)
self
.
assertFalse
(
coco_evaluator
.
_image_ids
)
self
.
assertFalse
(
coco_evaluator
.
_image_ids
)
def
testGetOneMAPWithMatchingGroundtruthAndDetectionsPaddedBatches
(
self
):
def
testGetOneMAPWithMatchingGroundtruthAndDetectionsPaddedBatches
(
self
):
category_list
=
[{
'id'
:
0
,
'name'
:
'person'
},
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
{
'id'
:
1
,
'name'
:
'cat'
},
_get_categories_list
())
{
'id'
:
2
,
'name'
:
'dog'
}]
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
category_list
)
batch_size
=
3
batch_size
=
3
image_id
=
tf
.
placeholder
(
tf
.
string
,
shape
=
(
batch_size
))
image_id
=
tf
.
placeholder
(
tf
.
string
,
shape
=
(
batch_size
))
groundtruth_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
batch_size
,
None
,
4
))
groundtruth_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
batch_size
,
None
,
4
))
...
@@ -525,27 +497,40 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
...
@@ -525,27 +497,40 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
_
,
update_op
=
eval_metric_ops
[
'DetectionBoxes_Precision/mAP'
]
_
,
update_op
=
eval_metric_ops
[
'DetectionBoxes_Precision/mAP'
]
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
sess
.
run
(
update_op
,
sess
.
run
(
feed_dict
=
{
update_op
,
image_id
:
[
'image1'
,
'image2'
,
'image3'
],
feed_dict
=
{
groundtruth_boxes
:
np
.
array
([[[
100.
,
100.
,
200.
,
200.
],
image_id
:
[
'image1'
,
'image2'
,
'image3'
],
[
-
1
,
-
1
,
-
1
,
-
1
]],
groundtruth_boxes
:
[[
50.
,
50.
,
100.
,
100.
],
np
.
array
([[[
100.
,
100.
,
200.
,
200.
],
[
-
1
,
-
1
,
-
1
,
-
1
]],
[
-
1
,
-
1
,
-
1
,
-
1
]],
[[
50.
,
50.
,
100.
,
100.
],
[
-
1
,
-
1
,
-
1
,
-
1
]],
[[
25.
,
25.
,
50.
,
50.
],
[[
25.
,
25.
,
50.
,
50.
],
[
10.
,
10.
,
15.
,
15.
]]]),
[
10.
,
10.
,
15.
,
15.
]]]),
groundtruth_classes
:
groundtruth_classes
:
np
.
array
([[
1
,
-
1
],
[
3
,
-
1
],
[
2
,
2
]]),
np
.
array
([[
1
,
-
1
],
[
3
,
-
1
],
[
2
,
2
]]),
num_gt_boxes_per_image
:
np
.
array
([
1
,
1
,
2
]),
num_gt_boxes_per_image
:
detection_boxes
:
np
.
array
([[[
100.
,
100.
,
200.
,
200.
],
np
.
array
([
1
,
1
,
2
]),
[
0.
,
0.
,
0.
,
0.
]],
detection_boxes
:
[[
50.
,
50.
,
100.
,
100.
],
np
.
array
([[[
100.
,
100.
,
200.
,
200.
],
[
0.
,
0.
,
0.
,
0.
]],
[
0.
,
0.
,
0.
,
0.
],
[[
25.
,
25.
,
50.
,
50.
],
[
0.
,
0.
,
0.
,
0.
]],
[
10.
,
10.
,
15.
,
15.
]]]),
[[
50.
,
50.
,
100.
,
100.
],
detection_scores
:
np
.
array
([[.
8
,
0.
],
[.
7
,
0.
],
[.
95
,
.
9
]]),
[
0.
,
0.
,
0.
,
0.
],
detection_classes
:
np
.
array
([[
1
,
-
1
],
[
3
,
-
1
],
[
2
,
2
]]),
[
0.
,
0.
,
0.
,
0.
]],
num_det_boxes_per_image
:
np
.
array
([
1
,
1
,
2
]),
[[
25.
,
25.
,
50.
,
50.
],
})
[
10.
,
10.
,
15.
,
15.
],
[
10.
,
10.
,
15.
,
15.
]]]),
detection_scores
:
np
.
array
([[.
8
,
0.
,
0.
],
[.
7
,
0.
,
0.
],
[.
95
,
.
9
,
0.9
]]),
detection_classes
:
np
.
array
([[
1
,
-
1
,
-
1
],
[
3
,
-
1
,
-
1
],
[
2
,
2
,
2
]]),
num_det_boxes_per_image
:
np
.
array
([
1
,
1
,
3
]),
})
# Check the number of bounding boxes added.
self
.
assertEqual
(
len
(
coco_evaluator
.
_groundtruth_list
),
4
)
self
.
assertEqual
(
len
(
coco_evaluator
.
_detection_boxes_list
),
5
)
metrics
=
{}
metrics
=
{}
for
key
,
(
value_op
,
_
)
in
eval_metric_ops
.
iteritems
():
for
key
,
(
value_op
,
_
)
in
eval_metric_ops
.
iteritems
():
metrics
[
key
]
=
value_op
metrics
[
key
]
=
value_op
...
@@ -555,14 +540,14 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
...
@@ -555,14 +540,14 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP@.75IOU'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP@.75IOU'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (medium)'
],
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (medium)'
],
-
1.0
)
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (small)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (small)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@1'
],
0.
75
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@1'
],
0.
83333331
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@10'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@10'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (medium)'
],
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (medium)'
],
-
1.0
)
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (small)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (small)'
],
1.0
)
self
.
assertFalse
(
coco_evaluator
.
_groundtruth_list
)
self
.
assertFalse
(
coco_evaluator
.
_groundtruth_list
)
self
.
assertFalse
(
coco_evaluator
.
_detection_boxes_list
)
self
.
assertFalse
(
coco_evaluator
.
_detection_boxes_list
)
...
@@ -572,10 +557,7 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
...
@@ -572,10 +557,7 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
class
CocoMaskEvaluationTest
(
tf
.
test
.
TestCase
):
class
CocoMaskEvaluationTest
(
tf
.
test
.
TestCase
):
def
testGetOneMAPWithMatchingGroundtruthAndDetections
(
self
):
def
testGetOneMAPWithMatchingGroundtruthAndDetections
(
self
):
category_list
=
[{
'id'
:
0
,
'name'
:
'person'
},
coco_evaluator
=
coco_evaluation
.
CocoMaskEvaluator
(
_get_categories_list
())
{
'id'
:
1
,
'name'
:
'cat'
},
{
'id'
:
2
,
'name'
:
'dog'
}]
coco_evaluator
=
coco_evaluation
.
CocoMaskEvaluator
(
category_list
)
coco_evaluator
.
add_single_ground_truth_image_info
(
coco_evaluator
.
add_single_ground_truth_image_info
(
image_id
=
'image1'
,
image_id
=
'image1'
,
groundtruth_dict
=
{
groundtruth_dict
=
{
...
@@ -657,10 +639,7 @@ class CocoMaskEvaluationTest(tf.test.TestCase):
...
@@ -657,10 +639,7 @@ class CocoMaskEvaluationTest(tf.test.TestCase):
class
CocoMaskEvaluationPyFuncTest
(
tf
.
test
.
TestCase
):
class
CocoMaskEvaluationPyFuncTest
(
tf
.
test
.
TestCase
):
def
testGetOneMAPWithMatchingGroundtruthAndDetections
(
self
):
def
testGetOneMAPWithMatchingGroundtruthAndDetections
(
self
):
category_list
=
[{
'id'
:
0
,
'name'
:
'person'
},
coco_evaluator
=
coco_evaluation
.
CocoMaskEvaluator
(
_get_categories_list
())
{
'id'
:
1
,
'name'
:
'cat'
},
{
'id'
:
2
,
'name'
:
'dog'
}]
coco_evaluator
=
coco_evaluation
.
CocoMaskEvaluator
(
category_list
)
image_id
=
tf
.
placeholder
(
tf
.
string
,
shape
=
())
image_id
=
tf
.
placeholder
(
tf
.
string
,
shape
=
())
groundtruth_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
4
))
groundtruth_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
4
))
groundtruth_classes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
))
groundtruth_classes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
))
...
@@ -756,5 +735,6 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
...
@@ -756,5 +735,6 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
self
.
assertFalse
(
coco_evaluator
.
_image_id_to_mask_shape_map
)
self
.
assertFalse
(
coco_evaluator
.
_image_id_to_mask_shape_map
)
self
.
assertFalse
(
coco_evaluator
.
_detection_masks_list
)
self
.
assertFalse
(
coco_evaluator
.
_detection_masks_list
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
tf
.
test
.
main
()
research/object_detection/metrics/offline_eval_map_corloc.py
View file @
27b4acd4
...
@@ -91,10 +91,8 @@ def read_data_and_evaluate(input_config, eval_config):
...
@@ -91,10 +91,8 @@ def read_data_and_evaluate(input_config, eval_config):
if
input_config
.
WhichOneof
(
'input_reader'
)
==
'tf_record_input_reader'
:
if
input_config
.
WhichOneof
(
'input_reader'
)
==
'tf_record_input_reader'
:
input_paths
=
input_config
.
tf_record_input_reader
.
input_path
input_paths
=
input_config
.
tf_record_input_reader
.
input_path
label_map
=
label_map_util
.
load_labelmap
(
input_config
.
label_map_path
)
categories
=
label_map_util
.
create_categories_from_labelmap
(
max_num_classes
=
max
([
item
.
id
for
item
in
label_map
.
item
])
input_config
.
label_map_path
)
categories
=
label_map_util
.
convert_label_map_to_categories
(
label_map
,
max_num_classes
)
object_detection_evaluators
=
evaluator
.
get_evaluators
(
object_detection_evaluators
=
evaluator
.
get_evaluators
(
eval_config
,
categories
)
eval_config
,
categories
)
...
...
research/object_detection/model_lib.py
View file @
27b4acd4
...
@@ -18,6 +18,7 @@ from __future__ import absolute_import
...
@@ -18,6 +18,7 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
division
from
__future__
import
print_function
from
__future__
import
print_function
import
copy
import
functools
import
functools
import
os
import
os
...
@@ -43,9 +44,12 @@ MODEL_BUILD_UTIL_MAP = {
...
@@ -43,9 +44,12 @@ MODEL_BUILD_UTIL_MAP = {
config_util
.
create_pipeline_proto_from_configs
,
config_util
.
create_pipeline_proto_from_configs
,
'merge_external_params_with_configs'
:
'merge_external_params_with_configs'
:
config_util
.
merge_external_params_with_configs
,
config_util
.
merge_external_params_with_configs
,
'create_train_input_fn'
:
inputs
.
create_train_input_fn
,
'create_train_input_fn'
:
'create_eval_input_fn'
:
inputs
.
create_eval_input_fn
,
inputs
.
create_train_input_fn
,
'create_predict_input_fn'
:
inputs
.
create_predict_input_fn
,
'create_eval_input_fn'
:
inputs
.
create_eval_input_fn
,
'create_predict_input_fn'
:
inputs
.
create_predict_input_fn
,
}
}
...
@@ -126,8 +130,9 @@ def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True):
...
@@ -126,8 +130,9 @@ def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True):
ValueError: If unpad_tensors is True and `tensor_dict` does not contain
ValueError: If unpad_tensors is True and `tensor_dict` does not contain
`num_groundtruth_boxes` tensor.
`num_groundtruth_boxes` tensor.
"""
"""
unbatched_tensor_dict
=
{
key
:
tf
.
unstack
(
tensor
)
unbatched_tensor_dict
=
{
for
key
,
tensor
in
tensor_dict
.
items
()}
key
:
tf
.
unstack
(
tensor
)
for
key
,
tensor
in
tensor_dict
.
items
()
}
if
unpad_groundtruth_tensors
:
if
unpad_groundtruth_tensors
:
if
(
fields
.
InputDataFields
.
num_groundtruth_boxes
not
in
if
(
fields
.
InputDataFields
.
num_groundtruth_boxes
not
in
unbatched_tensor_dict
):
unbatched_tensor_dict
):
...
@@ -206,8 +211,8 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
...
@@ -206,8 +211,8 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
# Make sure to set the Keras learning phase. True during training,
# Make sure to set the Keras learning phase. True during training,
# False for inference.
# False for inference.
tf
.
keras
.
backend
.
set_learning_phase
(
is_training
)
tf
.
keras
.
backend
.
set_learning_phase
(
is_training
)
detection_model
=
detection_model_fn
(
is_training
=
is_training
,
detection_model
=
detection_model_fn
(
add_summaries
=
(
not
use_tpu
))
is_training
=
is_training
,
add_summaries
=
(
not
use_tpu
))
scaffold_fn
=
None
scaffold_fn
=
None
if
mode
==
tf
.
estimator
.
ModeKeys
.
TRAIN
:
if
mode
==
tf
.
estimator
.
ModeKeys
.
TRAIN
:
...
@@ -237,6 +242,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
...
@@ -237,6 +242,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
gt_weights_list
=
None
gt_weights_list
=
None
if
fields
.
InputDataFields
.
groundtruth_weights
in
labels
:
if
fields
.
InputDataFields
.
groundtruth_weights
in
labels
:
gt_weights_list
=
labels
[
fields
.
InputDataFields
.
groundtruth_weights
]
gt_weights_list
=
labels
[
fields
.
InputDataFields
.
groundtruth_weights
]
gt_is_crowd_list
=
None
if
fields
.
InputDataFields
.
groundtruth_is_crowd
in
labels
:
if
fields
.
InputDataFields
.
groundtruth_is_crowd
in
labels
:
gt_is_crowd_list
=
labels
[
fields
.
InputDataFields
.
groundtruth_is_crowd
]
gt_is_crowd_list
=
labels
[
fields
.
InputDataFields
.
groundtruth_is_crowd
]
detection_model
.
provide_groundtruth
(
detection_model
.
provide_groundtruth
(
...
@@ -248,8 +254,18 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
...
@@ -248,8 +254,18 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
groundtruth_is_crowd_list
=
gt_is_crowd_list
)
groundtruth_is_crowd_list
=
gt_is_crowd_list
)
preprocessed_images
=
features
[
fields
.
InputDataFields
.
image
]
preprocessed_images
=
features
[
fields
.
InputDataFields
.
image
]
prediction_dict
=
detection_model
.
predict
(
if
use_tpu
and
train_config
.
use_bfloat16
:
preprocessed_images
,
features
[
fields
.
InputDataFields
.
true_image_shape
])
with
tf
.
contrib
.
tpu
.
bfloat16_scope
():
prediction_dict
=
detection_model
.
predict
(
preprocessed_images
,
features
[
fields
.
InputDataFields
.
true_image_shape
])
for
k
,
v
in
prediction_dict
.
items
():
if
v
.
dtype
==
tf
.
bfloat16
:
prediction_dict
[
k
]
=
tf
.
cast
(
v
,
tf
.
float32
)
else
:
prediction_dict
=
detection_model
.
predict
(
preprocessed_images
,
features
[
fields
.
InputDataFields
.
true_image_shape
])
if
mode
in
(
tf
.
estimator
.
ModeKeys
.
EVAL
,
tf
.
estimator
.
ModeKeys
.
PREDICT
):
if
mode
in
(
tf
.
estimator
.
ModeKeys
.
EVAL
,
tf
.
estimator
.
ModeKeys
.
PREDICT
):
detections
=
detection_model
.
postprocess
(
detections
=
detection_model
.
postprocess
(
prediction_dict
,
features
[
fields
.
InputDataFields
.
true_image_shape
])
prediction_dict
,
features
[
fields
.
InputDataFields
.
true_image_shape
])
...
@@ -270,13 +286,16 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
...
@@ -270,13 +286,16 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
train_config
.
load_all_detection_checkpoint_vars
))
train_config
.
load_all_detection_checkpoint_vars
))
available_var_map
=
(
available_var_map
=
(
variables_helper
.
get_variables_available_in_checkpoint
(
variables_helper
.
get_variables_available_in_checkpoint
(
asg_map
,
train_config
.
fine_tune_checkpoint
,
asg_map
,
train_config
.
fine_tune_checkpoint
,
include_global_step
=
False
))
include_global_step
=
False
))
if
use_tpu
:
if
use_tpu
:
def
tpu_scaffold
():
def
tpu_scaffold
():
tf
.
train
.
init_from_checkpoint
(
train_config
.
fine_tune_checkpoint
,
tf
.
train
.
init_from_checkpoint
(
train_config
.
fine_tune_checkpoint
,
available_var_map
)
available_var_map
)
return
tf
.
train
.
Scaffold
()
return
tf
.
train
.
Scaffold
()
scaffold_fn
=
tpu_scaffold
scaffold_fn
=
tpu_scaffold
else
:
else
:
tf
.
train
.
init_from_checkpoint
(
train_config
.
fine_tune_checkpoint
,
tf
.
train
.
init_from_checkpoint
(
train_config
.
fine_tune_checkpoint
,
...
@@ -290,8 +309,8 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
...
@@ -290,8 +309,8 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
regularization_losses
=
tf
.
get_collection
(
regularization_losses
=
tf
.
get_collection
(
tf
.
GraphKeys
.
REGULARIZATION_LOSSES
)
tf
.
GraphKeys
.
REGULARIZATION_LOSSES
)
if
regularization_losses
:
if
regularization_losses
:
regularization_loss
=
tf
.
add_n
(
regularization_losses
,
regularization_loss
=
tf
.
add_n
(
name
=
'regularization_loss'
)
regularization_losses
,
name
=
'regularization_loss'
)
losses
.
append
(
regularization_loss
)
losses
.
append
(
regularization_loss
)
losses_dict
[
'Loss/regularization_loss'
]
=
regularization_loss
losses_dict
[
'Loss/regularization_loss'
]
=
regularization_loss
total_loss
=
tf
.
add_n
(
losses
,
name
=
'total_loss'
)
total_loss
=
tf
.
add_n
(
losses
,
name
=
'total_loss'
)
...
@@ -353,14 +372,19 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
...
@@ -353,14 +372,19 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
eval_metric_ops
=
None
eval_metric_ops
=
None
scaffold
=
None
scaffold
=
None
if
mode
==
tf
.
estimator
.
ModeKeys
.
EVAL
:
if
mode
==
tf
.
estimator
.
ModeKeys
.
EVAL
:
class_agnostic
=
(
fields
.
DetectionResultFields
.
detection_classes
class_agnostic
=
(
not
in
detections
)
fields
.
DetectionResultFields
.
detection_classes
not
in
detections
)
groundtruth
=
_prepare_groundtruth_for_eval
(
groundtruth
=
_prepare_groundtruth_for_eval
(
detection_model
,
detection_model
,
class_agnostic
)
class_agnostic
)
use_original_images
=
fields
.
InputDataFields
.
original_image
in
features
use_original_images
=
fields
.
InputDataFields
.
original_image
in
features
eval_images
=
(
if
use_original_images
:
features
[
fields
.
InputDataFields
.
original_image
]
if
use_original_images
eval_images
=
tf
.
cast
(
tf
.
image
.
resize_bilinear
(
else
features
[
fields
.
InputDataFields
.
image
])
features
[
fields
.
InputDataFields
.
original_image
][
0
:
1
],
features
[
fields
.
InputDataFields
.
original_image_spatial_shape
][
0
]),
tf
.
uint8
)
else
:
eval_images
=
features
[
fields
.
InputDataFields
.
image
]
eval_dict
=
eval_util
.
result_dict_for_single_example
(
eval_dict
=
eval_util
.
result_dict_for_single_example
(
eval_images
[
0
:
1
],
eval_images
[
0
:
1
],
features
[
inputs
.
HASH_KEY
][
0
],
features
[
inputs
.
HASH_KEY
][
0
],
...
@@ -374,28 +398,26 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
...
@@ -374,28 +398,26 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
else
:
else
:
category_index
=
label_map_util
.
create_category_index_from_labelmap
(
category_index
=
label_map_util
.
create_category_index_from_labelmap
(
eval_input_config
.
label_map_path
)
eval_input_config
.
label_map_path
)
img_summary
=
None
vis_metric_ops
=
None
if
not
use_tpu
and
use_original_images
:
if
not
use_tpu
and
use_original_images
:
detection_and_groundtruth
=
(
eval_metric_op_vis
=
vis_utils
.
VisualizeSingleFrameDetections
(
vis_utils
.
draw_side_by_side_evaluation_image
(
category_index
,
eval_dict
,
category_index
,
max_boxes_to_draw
=
eval_config
.
max_num_boxes_to_visualize
,
max_examples_to_draw
=
eval_config
.
num_visualizations
,
min_score_thresh
=
eval_config
.
min_score_threshold
,
max_boxes_to_draw
=
eval_config
.
max_num_boxes_to_visualize
,
use_normalized_coordinates
=
False
))
min_score_thresh
=
eval_config
.
min_score_threshold
,
img_summary
=
tf
.
summary
.
image
(
'Detections_Left_Groundtruth_Right'
,
use_normalized_coordinates
=
False
)
detection_and_groundtruth
)
vis_metric_ops
=
eval_metric_op_vis
.
get_estimator_eval_metric_ops
(
eval_dict
)
# Eval metrics on a single example.
# Eval metrics on a single example.
eval_metric_ops
=
eval_util
.
get_eval_metric_ops_for_evaluators
(
eval_metric_ops
=
eval_util
.
get_eval_metric_ops_for_evaluators
(
eval_config
,
eval_config
,
category_index
.
values
(),
eval_dict
)
category_index
.
values
(),
eval_dict
)
for
loss_key
,
loss_tensor
in
iter
(
losses_dict
.
items
()):
for
loss_key
,
loss_tensor
in
iter
(
losses_dict
.
items
()):
eval_metric_ops
[
loss_key
]
=
tf
.
metrics
.
mean
(
loss_tensor
)
eval_metric_ops
[
loss_key
]
=
tf
.
metrics
.
mean
(
loss_tensor
)
for
var
in
optimizer_summary_vars
:
for
var
in
optimizer_summary_vars
:
eval_metric_ops
[
var
.
op
.
name
]
=
(
var
,
tf
.
no_op
())
eval_metric_ops
[
var
.
op
.
name
]
=
(
var
,
tf
.
no_op
())
if
img_summary
is
not
None
:
if
vis_metric_ops
is
not
None
:
eval_metric_ops
[
'Detections_Left_Groundtruth_Right'
]
=
(
eval_metric_ops
.
update
(
vis_metric_ops
)
img_summary
,
tf
.
no_op
())
eval_metric_ops
=
{
str
(
k
):
v
for
k
,
v
in
eval_metric_ops
.
items
()}
eval_metric_ops
=
{
str
(
k
):
v
for
k
,
v
in
eval_metric_ops
.
items
()}
if
eval_config
.
use_moving_averages
:
if
eval_config
.
use_moving_averages
:
...
@@ -435,12 +457,14 @@ def create_estimator_and_inputs(run_config,
...
@@ -435,12 +457,14 @@ def create_estimator_and_inputs(run_config,
hparams
,
hparams
,
pipeline_config_path
,
pipeline_config_path
,
train_steps
=
None
,
train_steps
=
None
,
eval_steps
=
None
,
sample_1_of_n_eval_examples
=
1
,
sample_1_of_n_eval_on_train_examples
=
1
,
model_fn_creator
=
create_model_fn
,
model_fn_creator
=
create_model_fn
,
use_tpu_estimator
=
False
,
use_tpu_estimator
=
False
,
use_tpu
=
False
,
use_tpu
=
False
,
num_shards
=
1
,
num_shards
=
1
,
params
=
None
,
params
=
None
,
override_eval_num_epochs
=
True
,
**
kwargs
):
**
kwargs
):
"""Creates `Estimator`, input functions, and steps.
"""Creates `Estimator`, input functions, and steps.
...
@@ -450,8 +474,11 @@ def create_estimator_and_inputs(run_config,
...
@@ -450,8 +474,11 @@ def create_estimator_and_inputs(run_config,
pipeline_config_path: A path to a pipeline config file.
pipeline_config_path: A path to a pipeline config file.
train_steps: Number of training steps. If None, the number of training steps
train_steps: Number of training steps. If None, the number of training steps
is set from the `TrainConfig` proto.
is set from the `TrainConfig` proto.
eval_steps: Number of evaluation steps per evaluation cycle. If None, the
sample_1_of_n_eval_examples: Integer representing how often an eval example
number of evaluation steps is set from the `EvalConfig` proto.
should be sampled. If 1, will sample all examples.
sample_1_of_n_eval_on_train_examples: Similar to
`sample_1_of_n_eval_examples`, except controls the sampling of training
data for evaluation.
model_fn_creator: A function that creates a `model_fn` for `Estimator`.
model_fn_creator: A function that creates a `model_fn` for `Estimator`.
Follows the signature:
Follows the signature:
...
@@ -470,19 +497,20 @@ def create_estimator_and_inputs(run_config,
...
@@ -470,19 +497,20 @@ def create_estimator_and_inputs(run_config,
is True.
is True.
params: Parameter dictionary passed from the estimator. Only used if
params: Parameter dictionary passed from the estimator. Only used if
`use_tpu_estimator` is True.
`use_tpu_estimator` is True.
override_eval_num_epochs: Whether to overwrite the number of epochs to
1 for eval_input.
**kwargs: Additional keyword arguments for configuration override.
**kwargs: Additional keyword arguments for configuration override.
Returns:
Returns:
A dictionary with the following fields:
A dictionary with the following fields:
'estimator': An `Estimator` or `TPUEstimator`.
'estimator': An `Estimator` or `TPUEstimator`.
'train_input_fn': A training input function.
'train_input_fn': A training input function.
'eval_input_fn': An evaluation input function.
'eval_input_fns': A list of all evaluation input functions.
'eval_input_names': A list of names for each evaluation input.
'eval_on_train_input_fn': An evaluation-on-train input function.
'eval_on_train_input_fn': An evaluation-on-train input function.
'predict_input_fn': A prediction input function.
'predict_input_fn': A prediction input function.
'train_steps': Number of training steps. Either directly from input or from
'train_steps': Number of training steps. Either directly from input or from
configuration.
configuration.
'eval_steps': Number of evaluation steps. Either directly from input or from
configuration.
"""
"""
get_configs_from_pipeline_file
=
MODEL_BUILD_UTIL_MAP
[
get_configs_from_pipeline_file
=
MODEL_BUILD_UTIL_MAP
[
'get_configs_from_pipeline_file'
]
'get_configs_from_pipeline_file'
]
...
@@ -495,27 +523,36 @@ def create_estimator_and_inputs(run_config,
...
@@ -495,27 +523,36 @@ def create_estimator_and_inputs(run_config,
create_predict_input_fn
=
MODEL_BUILD_UTIL_MAP
[
'create_predict_input_fn'
]
create_predict_input_fn
=
MODEL_BUILD_UTIL_MAP
[
'create_predict_input_fn'
]
configs
=
get_configs_from_pipeline_file
(
pipeline_config_path
)
configs
=
get_configs_from_pipeline_file
(
pipeline_config_path
)
kwargs
.
update
({
'train_steps'
:
train_steps
,
'sample_1_of_n_eval_examples'
:
sample_1_of_n_eval_examples
})
if
override_eval_num_epochs
:
kwargs
.
update
({
'eval_num_epochs'
:
1
})
tf
.
logging
.
warning
(
'Forced number of epochs for all eval validations to be 1.'
)
configs
=
merge_external_params_with_configs
(
configs
=
merge_external_params_with_configs
(
configs
,
configs
,
hparams
,
kwargs_dict
=
kwargs
)
hparams
,
train_steps
=
train_steps
,
eval_steps
=
eval_steps
,
retain_original_images_in_eval
=
False
if
use_tpu
else
True
,
**
kwargs
)
model_config
=
configs
[
'model'
]
model_config
=
configs
[
'model'
]
train_config
=
configs
[
'train_config'
]
train_config
=
configs
[
'train_config'
]
train_input_config
=
configs
[
'train_input_config'
]
train_input_config
=
configs
[
'train_input_config'
]
eval_config
=
configs
[
'eval_config'
]
eval_config
=
configs
[
'eval_config'
]
eval_input_config
=
configs
[
'eval_input_config'
]
eval_input_configs
=
configs
[
'eval_input_configs'
]
eval_on_train_input_config
=
copy
.
deepcopy
(
train_input_config
)
eval_on_train_input_config
.
sample_1_of_n_examples
=
(
sample_1_of_n_eval_on_train_examples
)
if
override_eval_num_epochs
and
eval_on_train_input_config
.
num_epochs
!=
1
:
tf
.
logging
.
warning
(
'Expected number of evaluation epochs is 1, but '
'instead encountered `eval_on_train_input_config'
'.num_epochs` = '
'{}. Overwriting `num_epochs` to 1.'
.
format
(
eval_on_train_input_config
.
num_epochs
))
eval_on_train_input_config
.
num_epochs
=
1
# update train_steps from config but only when non-zero value is provided
# update train_steps from config but only when non-zero value is provided
if
train_steps
is
None
and
train_config
.
num_steps
!=
0
:
if
train_steps
is
None
and
train_config
.
num_steps
!=
0
:
train_steps
=
train_config
.
num_steps
train_steps
=
train_config
.
num_steps
# update eval_steps from config but only when non-zero value is provided
if
eval_steps
is
None
and
eval_config
.
num_examples
!=
0
:
eval_steps
=
eval_config
.
num_examples
detection_model_fn
=
functools
.
partial
(
detection_model_fn
=
functools
.
partial
(
model_builder
.
build
,
model_config
=
model_config
)
model_builder
.
build
,
model_config
=
model_config
)
...
@@ -524,18 +561,25 @@ def create_estimator_and_inputs(run_config,
...
@@ -524,18 +561,25 @@ def create_estimator_and_inputs(run_config,
train_config
=
train_config
,
train_config
=
train_config
,
train_input_config
=
train_input_config
,
train_input_config
=
train_input_config
,
model_config
=
model_config
)
model_config
=
model_config
)
eval_input_fn
=
create_eval_input_fn
(
eval_input_fns
=
[
eval_config
=
eval_config
,
create_eval_input_fn
(
eval_input_config
=
eval_input_config
,
eval_config
=
eval_config
,
model_config
=
model_config
)
eval_input_config
=
eval_input_config
,
model_config
=
model_config
)
for
eval_input_config
in
eval_input_configs
]
eval_input_names
=
[
eval_input_config
.
name
for
eval_input_config
in
eval_input_configs
]
eval_on_train_input_fn
=
create_eval_input_fn
(
eval_on_train_input_fn
=
create_eval_input_fn
(
eval_config
=
eval_config
,
eval_config
=
eval_config
,
eval_input_config
=
train_input_config
,
eval_input_config
=
eval_on_
train_input_config
,
model_config
=
model_config
)
model_config
=
model_config
)
predict_input_fn
=
create_predict_input_fn
(
predict_input_fn
=
create_predict_input_fn
(
model_config
=
model_config
,
predict_input_config
=
eval_input_config
)
model_config
=
model_config
,
predict_input_config
=
eval_input_config
s
[
0
]
)
tf
.
logging
.
info
(
'create_estimator_and_inputs: use_tpu %s'
,
use_tpu
)
export_to_tpu
=
hparams
.
get
(
'export_to_tpu'
,
False
)
tf
.
logging
.
info
(
'create_estimator_and_inputs: use_tpu %s, export_to_tpu %s'
,
use_tpu
,
export_to_tpu
)
model_fn
=
model_fn_creator
(
detection_model_fn
,
configs
,
hparams
,
use_tpu
)
model_fn
=
model_fn_creator
(
detection_model_fn
,
configs
,
hparams
,
use_tpu
)
if
use_tpu_estimator
:
if
use_tpu_estimator
:
estimator
=
tf
.
contrib
.
tpu
.
TPUEstimator
(
estimator
=
tf
.
contrib
.
tpu
.
TPUEstimator
(
...
@@ -552,89 +596,85 @@ def create_estimator_and_inputs(run_config,
...
@@ -552,89 +596,85 @@ def create_estimator_and_inputs(run_config,
# Write the as-run pipeline config to disk.
# Write the as-run pipeline config to disk.
if
run_config
.
is_chief
:
if
run_config
.
is_chief
:
pipeline_config_final
=
create_pipeline_proto_from_configs
(
pipeline_config_final
=
create_pipeline_proto_from_configs
(
configs
)
configs
)
config_util
.
save_pipeline_config
(
pipeline_config_final
,
estimator
.
model_dir
)
config_util
.
save_pipeline_config
(
pipeline_config_final
,
estimator
.
model_dir
)
return
dict
(
return
dict
(
estimator
=
estimator
,
estimator
=
estimator
,
train_input_fn
=
train_input_fn
,
train_input_fn
=
train_input_fn
,
eval_input_fn
=
eval_input_fn
,
eval_input_fns
=
eval_input_fns
,
eval_input_names
=
eval_input_names
,
eval_on_train_input_fn
=
eval_on_train_input_fn
,
eval_on_train_input_fn
=
eval_on_train_input_fn
,
predict_input_fn
=
predict_input_fn
,
predict_input_fn
=
predict_input_fn
,
train_steps
=
train_steps
,
train_steps
=
train_steps
)
eval_steps
=
eval_steps
)
def
create_train_and_eval_specs
(
train_input_fn
,
def
create_train_and_eval_specs
(
train_input_fn
,
eval_input_fn
,
eval_input_fn
s
,
eval_on_train_input_fn
,
eval_on_train_input_fn
,
predict_input_fn
,
predict_input_fn
,
train_steps
,
train_steps
,
eval_steps
,
eval_on_train_data
=
False
,
eval_on_train_data
=
False
,
eval_on_train_steps
=
None
,
final_exporter_name
=
'Servo'
,
final_exporter_name
=
'Servo'
,
eval_spec_name
=
'eval'
):
eval_spec_name
s
=
None
):
"""Creates a `TrainSpec` and `EvalSpec`s.
"""Creates a `TrainSpec` and `EvalSpec`s.
Args:
Args:
train_input_fn: Function that produces features and labels on train data.
train_input_fn: Function that produces features and labels on train data.
eval_input_fn: Function that produces features and labels on eval data.
eval_input_fns: A list of functions that produce features and labels on eval
data.
eval_on_train_input_fn: Function that produces features and labels for
eval_on_train_input_fn: Function that produces features and labels for
evaluation on train data.
evaluation on train data.
predict_input_fn: Function that produces features for inference.
predict_input_fn: Function that produces features for inference.
train_steps: Number of training steps.
train_steps: Number of training steps.
eval_steps: Number of eval steps.
eval_on_train_data: Whether to evaluate model on training data. Default is
eval_on_train_data: Whether to evaluate model on training data. Default is
False.
False.
eval_on_train_steps: Number of eval steps for training data. If not given,
uses eval_steps.
final_exporter_name: String name given to `FinalExporter`.
final_exporter_name: String name given to `FinalExporter`.
eval_spec_name:
String name given to main
`EvalSpec`.
eval_spec_name
s
:
A list of string names for each
`EvalSpec`.
Returns:
Returns:
Tuple of `TrainSpec` and list of `EvalSpecs`.
The first
`
E
val
Spec` is for
Tuple of `TrainSpec` and list of `EvalSpecs`.
If
`
e
val
_on_train_data` is
evaluation data. If
`
e
val
_on_train_data` is True, the second `EvalSpec` in
True, the last
`
E
val
Spec` in the list will correspond to training data. The
the list will correspond to training
data.
rest EvalSpecs in the list are evaluation
data
s
.
"""
"""
exporter
=
tf
.
estimator
.
FinalExporter
(
name
=
final_exporter_name
,
serving_input_receiver_fn
=
predict_input_fn
)
train_spec
=
tf
.
estimator
.
TrainSpec
(
train_spec
=
tf
.
estimator
.
TrainSpec
(
input_fn
=
train_input_fn
,
max_steps
=
train_steps
)
input_fn
=
train_input_fn
,
max_steps
=
train_steps
)
eval_specs
=
[
if
eval_spec_names
is
None
:
tf
.
estimator
.
EvalSpec
(
eval_spec_names
=
range
(
len
(
eval_input_fns
))
name
=
eval_spec_name
,
input_fn
=
eval_input_fn
,
eval_specs
=
[]
steps
=
eval_steps
,
for
eval_spec_name
,
eval_input_fn
in
zip
(
eval_spec_names
,
eval_input_fns
):
exporters
=
exporter
)
exporter_name
=
'{}_{}'
.
format
(
final_exporter_name
,
eval_spec_name
)
]
exporter
=
tf
.
estimator
.
FinalExporter
(
name
=
exporter_name
,
serving_input_receiver_fn
=
predict_input_fn
)
eval_specs
.
append
(
tf
.
estimator
.
EvalSpec
(
name
=
eval_spec_name
,
input_fn
=
eval_input_fn
,
steps
=
None
,
exporters
=
exporter
))
if
eval_on_train_data
:
if
eval_on_train_data
:
eval_specs
.
append
(
eval_specs
.
append
(
tf
.
estimator
.
EvalSpec
(
tf
.
estimator
.
EvalSpec
(
name
=
'eval_on_train'
,
input_fn
=
eval_on_train_input_fn
,
name
=
'eval_on_train'
,
input_fn
=
eval_on_train_input_fn
,
steps
=
None
))
steps
=
eval_on_train_steps
or
eval_steps
))
return
train_spec
,
eval_specs
return
train_spec
,
eval_specs
def
continuous_eval
(
estimator
,
model_dir
,
input_fn
,
eval_steps
,
train_steps
,
def
continuous_eval
(
estimator
,
model_dir
,
input_fn
,
train_steps
,
name
):
name
):
"""Perform continuous evaluation on checkpoints written to a model directory.
"""Perform continuous evaluation on checkpoints written to a model directory.
Args:
Args:
estimator: Estimator object to use for evaluation.
estimator: Estimator object to use for evaluation.
model_dir: Model directory to read checkpoints for continuous evaluation.
model_dir: Model directory to read checkpoints for continuous evaluation.
input_fn: Input function to use for evaluation.
input_fn: Input function to use for evaluation.
eval_steps: Number of steps to run during each evaluation.
train_steps: Number of training steps. This is used to infer the last
train_steps: Number of training steps. This is used to infer the last
checkpoint and stop evaluation loop.
checkpoint and stop evaluation loop.
name: Namescope for eval summary.
name: Namescope for eval summary.
"""
"""
def
terminate_eval
():
def
terminate_eval
():
tf
.
logging
.
info
(
'Terminating eval after 180 seconds of no checkpoints'
)
tf
.
logging
.
info
(
'Terminating eval after 180 seconds of no checkpoints'
)
return
True
return
True
...
@@ -646,10 +686,7 @@ def continuous_eval(estimator, model_dir, input_fn, eval_steps, train_steps,
...
@@ -646,10 +686,7 @@ def continuous_eval(estimator, model_dir, input_fn, eval_steps, train_steps,
tf
.
logging
.
info
(
'Starting Evaluation.'
)
tf
.
logging
.
info
(
'Starting Evaluation.'
)
try
:
try
:
eval_results
=
estimator
.
evaluate
(
eval_results
=
estimator
.
evaluate
(
input_fn
=
input_fn
,
input_fn
=
input_fn
,
steps
=
None
,
checkpoint_path
=
ckpt
,
name
=
name
)
steps
=
eval_steps
,
checkpoint_path
=
ckpt
,
name
=
name
)
tf
.
logging
.
info
(
'Eval results: %s'
%
eval_results
)
tf
.
logging
.
info
(
'Eval results: %s'
%
eval_results
)
# Terminate eval job when final checkpoint is reached
# Terminate eval job when final checkpoint is reached
...
@@ -713,10 +750,9 @@ def populate_experiment(run_config,
...
@@ -713,10 +750,9 @@ def populate_experiment(run_config,
**
kwargs
)
**
kwargs
)
estimator
=
train_and_eval_dict
[
'estimator'
]
estimator
=
train_and_eval_dict
[
'estimator'
]
train_input_fn
=
train_and_eval_dict
[
'train_input_fn'
]
train_input_fn
=
train_and_eval_dict
[
'train_input_fn'
]
eval_input_fn
=
train_and_eval_dict
[
'eval_input_fn'
]
eval_input_fn
s
=
train_and_eval_dict
[
'eval_input_fn
s
'
]
predict_input_fn
=
train_and_eval_dict
[
'predict_input_fn'
]
predict_input_fn
=
train_and_eval_dict
[
'predict_input_fn'
]
train_steps
=
train_and_eval_dict
[
'train_steps'
]
train_steps
=
train_and_eval_dict
[
'train_steps'
]
eval_steps
=
train_and_eval_dict
[
'eval_steps'
]
export_strategies
=
[
export_strategies
=
[
tf
.
contrib
.
learn
.
utils
.
saved_model_export_utils
.
make_export_strategy
(
tf
.
contrib
.
learn
.
utils
.
saved_model_export_utils
.
make_export_strategy
(
...
@@ -726,8 +762,9 @@ def populate_experiment(run_config,
...
@@ -726,8 +762,9 @@ def populate_experiment(run_config,
return
tf
.
contrib
.
learn
.
Experiment
(
return
tf
.
contrib
.
learn
.
Experiment
(
estimator
=
estimator
,
estimator
=
estimator
,
train_input_fn
=
train_input_fn
,
train_input_fn
=
train_input_fn
,
eval_input_fn
=
eval_input_fn
,
eval_input_fn
=
eval_input_fn
s
[
0
]
,
train_steps
=
train_steps
,
train_steps
=
train_steps
,
eval_steps
=
eval_steps
,
eval_steps
=
None
,
export_strategies
=
export_strategies
,
export_strategies
=
export_strategies
,
eval_delay_secs
=
120
,)
eval_delay_secs
=
120
,
)
research/object_detection/model_lib_test.py
View file @
27b4acd4
...
@@ -64,11 +64,13 @@ def _get_configs_for_model(model_name):
...
@@ -64,11 +64,13 @@ def _get_configs_for_model(model_name):
data_path
=
_get_data_path
()
data_path
=
_get_data_path
()
label_map_path
=
_get_labelmap_path
()
label_map_path
=
_get_labelmap_path
()
configs
=
config_util
.
get_configs_from_pipeline_file
(
filename
)
configs
=
config_util
.
get_configs_from_pipeline_file
(
filename
)
override_dict
=
{
'train_input_path'
:
data_path
,
'eval_input_path'
:
data_path
,
'label_map_path'
:
label_map_path
}
configs
=
config_util
.
merge_external_params_with_configs
(
configs
=
config_util
.
merge_external_params_with_configs
(
configs
,
configs
,
kwargs_dict
=
override_dict
)
train_input_path
=
data_path
,
eval_input_path
=
data_path
,
label_map_path
=
label_map_path
)
return
configs
return
configs
...
@@ -145,6 +147,9 @@ class ModelLibTest(tf.test.TestCase):
...
@@ -145,6 +147,9 @@ class ModelLibTest(tf.test.TestCase):
self
.
assertEqual
(
batch_size
,
detection_scores
.
shape
.
as_list
()[
0
])
self
.
assertEqual
(
batch_size
,
detection_scores
.
shape
.
as_list
()[
0
])
self
.
assertEqual
(
tf
.
float32
,
detection_scores
.
dtype
)
self
.
assertEqual
(
tf
.
float32
,
detection_scores
.
dtype
)
self
.
assertEqual
(
tf
.
float32
,
num_detections
.
dtype
)
self
.
assertEqual
(
tf
.
float32
,
num_detections
.
dtype
)
if
mode
==
'eval'
:
self
.
assertIn
(
'Detections_Left_Groundtruth_Right/0'
,
estimator_spec
.
eval_metric_ops
)
if
model_mode
==
tf
.
estimator
.
ModeKeys
.
TRAIN
:
if
model_mode
==
tf
.
estimator
.
ModeKeys
.
TRAIN
:
self
.
assertIsNotNone
(
estimator_spec
.
train_op
)
self
.
assertIsNotNone
(
estimator_spec
.
train_op
)
return
estimator_spec
return
estimator_spec
...
@@ -225,21 +230,17 @@ class ModelLibTest(tf.test.TestCase):
...
@@ -225,21 +230,17 @@ class ModelLibTest(tf.test.TestCase):
hparams_overrides
=
'load_pretrained=false'
)
hparams_overrides
=
'load_pretrained=false'
)
pipeline_config_path
=
get_pipeline_config_path
(
MODEL_NAME_FOR_TEST
)
pipeline_config_path
=
get_pipeline_config_path
(
MODEL_NAME_FOR_TEST
)
train_steps
=
20
train_steps
=
20
eval_steps
=
10
train_and_eval_dict
=
model_lib
.
create_estimator_and_inputs
(
train_and_eval_dict
=
model_lib
.
create_estimator_and_inputs
(
run_config
,
run_config
,
hparams
,
hparams
,
pipeline_config_path
,
pipeline_config_path
,
train_steps
=
train_steps
,
train_steps
=
train_steps
)
eval_steps
=
eval_steps
)
estimator
=
train_and_eval_dict
[
'estimator'
]
estimator
=
train_and_eval_dict
[
'estimator'
]
train_steps
=
train_and_eval_dict
[
'train_steps'
]
train_steps
=
train_and_eval_dict
[
'train_steps'
]
eval_steps
=
train_and_eval_dict
[
'eval_steps'
]
self
.
assertIsInstance
(
estimator
,
tf
.
estimator
.
Estimator
)
self
.
assertIsInstance
(
estimator
,
tf
.
estimator
.
Estimator
)
self
.
assertEqual
(
20
,
train_steps
)
self
.
assertEqual
(
20
,
train_steps
)
self
.
assertEqual
(
10
,
eval_steps
)
self
.
assertIn
(
'train_input_fn'
,
train_and_eval_dict
)
self
.
assertIn
(
'train_input_fn'
,
train_and_eval_dict
)
self
.
assertIn
(
'eval_input_fn'
,
train_and_eval_dict
)
self
.
assertIn
(
'eval_input_fn
s
'
,
train_and_eval_dict
)
self
.
assertIn
(
'eval_on_train_input_fn'
,
train_and_eval_dict
)
self
.
assertIn
(
'eval_on_train_input_fn'
,
train_and_eval_dict
)
def
test_create_estimator_with_default_train_eval_steps
(
self
):
def
test_create_estimator_with_default_train_eval_steps
(
self
):
...
@@ -250,16 +251,13 @@ class ModelLibTest(tf.test.TestCase):
...
@@ -250,16 +251,13 @@ class ModelLibTest(tf.test.TestCase):
pipeline_config_path
=
get_pipeline_config_path
(
MODEL_NAME_FOR_TEST
)
pipeline_config_path
=
get_pipeline_config_path
(
MODEL_NAME_FOR_TEST
)
configs
=
config_util
.
get_configs_from_pipeline_file
(
pipeline_config_path
)
configs
=
config_util
.
get_configs_from_pipeline_file
(
pipeline_config_path
)
config_train_steps
=
configs
[
'train_config'
].
num_steps
config_train_steps
=
configs
[
'train_config'
].
num_steps
config_eval_steps
=
configs
[
'eval_config'
].
num_examples
train_and_eval_dict
=
model_lib
.
create_estimator_and_inputs
(
train_and_eval_dict
=
model_lib
.
create_estimator_and_inputs
(
run_config
,
hparams
,
pipeline_config_path
)
run_config
,
hparams
,
pipeline_config_path
)
estimator
=
train_and_eval_dict
[
'estimator'
]
estimator
=
train_and_eval_dict
[
'estimator'
]
train_steps
=
train_and_eval_dict
[
'train_steps'
]
train_steps
=
train_and_eval_dict
[
'train_steps'
]
eval_steps
=
train_and_eval_dict
[
'eval_steps'
]
self
.
assertIsInstance
(
estimator
,
tf
.
estimator
.
Estimator
)
self
.
assertIsInstance
(
estimator
,
tf
.
estimator
.
Estimator
)
self
.
assertEqual
(
config_train_steps
,
train_steps
)
self
.
assertEqual
(
config_train_steps
,
train_steps
)
self
.
assertEqual
(
config_eval_steps
,
eval_steps
)
def
test_create_tpu_estimator_and_inputs
(
self
):
def
test_create_tpu_estimator_and_inputs
(
self
):
"""Tests that number of train/eval defaults to config values."""
"""Tests that number of train/eval defaults to config values."""
...
@@ -269,21 +267,17 @@ class ModelLibTest(tf.test.TestCase):
...
@@ -269,21 +267,17 @@ class ModelLibTest(tf.test.TestCase):
hparams_overrides
=
'load_pretrained=false'
)
hparams_overrides
=
'load_pretrained=false'
)
pipeline_config_path
=
get_pipeline_config_path
(
MODEL_NAME_FOR_TEST
)
pipeline_config_path
=
get_pipeline_config_path
(
MODEL_NAME_FOR_TEST
)
train_steps
=
20
train_steps
=
20
eval_steps
=
10
train_and_eval_dict
=
model_lib
.
create_estimator_and_inputs
(
train_and_eval_dict
=
model_lib
.
create_estimator_and_inputs
(
run_config
,
run_config
,
hparams
,
hparams
,
pipeline_config_path
,
pipeline_config_path
,
train_steps
=
train_steps
,
train_steps
=
train_steps
,
eval_steps
=
eval_steps
,
use_tpu_estimator
=
True
)
use_tpu_estimator
=
True
)
estimator
=
train_and_eval_dict
[
'estimator'
]
estimator
=
train_and_eval_dict
[
'estimator'
]
train_steps
=
train_and_eval_dict
[
'train_steps'
]
train_steps
=
train_and_eval_dict
[
'train_steps'
]
eval_steps
=
train_and_eval_dict
[
'eval_steps'
]
self
.
assertIsInstance
(
estimator
,
tpu_estimator
.
TPUEstimator
)
self
.
assertIsInstance
(
estimator
,
tpu_estimator
.
TPUEstimator
)
self
.
assertEqual
(
20
,
train_steps
)
self
.
assertEqual
(
20
,
train_steps
)
self
.
assertEqual
(
10
,
eval_steps
)
def
test_create_train_and_eval_specs
(
self
):
def
test_create_train_and_eval_specs
(
self
):
"""Tests that `TrainSpec` and `EvalSpec` is created correctly."""
"""Tests that `TrainSpec` and `EvalSpec` is created correctly."""
...
@@ -292,38 +286,32 @@ class ModelLibTest(tf.test.TestCase):
...
@@ -292,38 +286,32 @@ class ModelLibTest(tf.test.TestCase):
hparams_overrides
=
'load_pretrained=false'
)
hparams_overrides
=
'load_pretrained=false'
)
pipeline_config_path
=
get_pipeline_config_path
(
MODEL_NAME_FOR_TEST
)
pipeline_config_path
=
get_pipeline_config_path
(
MODEL_NAME_FOR_TEST
)
train_steps
=
20
train_steps
=
20
eval_steps
=
10
eval_on_train_steps
=
15
train_and_eval_dict
=
model_lib
.
create_estimator_and_inputs
(
train_and_eval_dict
=
model_lib
.
create_estimator_and_inputs
(
run_config
,
run_config
,
hparams
,
hparams
,
pipeline_config_path
,
pipeline_config_path
,
train_steps
=
train_steps
,
train_steps
=
train_steps
)
eval_steps
=
eval_steps
)
train_input_fn
=
train_and_eval_dict
[
'train_input_fn'
]
train_input_fn
=
train_and_eval_dict
[
'train_input_fn'
]
eval_input_fn
=
train_and_eval_dict
[
'eval_input_fn'
]
eval_input_fn
s
=
train_and_eval_dict
[
'eval_input_fn
s
'
]
eval_on_train_input_fn
=
train_and_eval_dict
[
'eval_on_train_input_fn'
]
eval_on_train_input_fn
=
train_and_eval_dict
[
'eval_on_train_input_fn'
]
predict_input_fn
=
train_and_eval_dict
[
'predict_input_fn'
]
predict_input_fn
=
train_and_eval_dict
[
'predict_input_fn'
]
train_steps
=
train_and_eval_dict
[
'train_steps'
]
train_steps
=
train_and_eval_dict
[
'train_steps'
]
eval_steps
=
train_and_eval_dict
[
'eval_steps'
]
train_spec
,
eval_specs
=
model_lib
.
create_train_and_eval_specs
(
train_spec
,
eval_specs
=
model_lib
.
create_train_and_eval_specs
(
train_input_fn
,
train_input_fn
,
eval_input_fn
,
eval_input_fn
s
,
eval_on_train_input_fn
,
eval_on_train_input_fn
,
predict_input_fn
,
predict_input_fn
,
train_steps
,
train_steps
,
eval_steps
,
eval_on_train_data
=
True
,
eval_on_train_data
=
True
,
eval_on_train_steps
=
eval_on_train_steps
,
final_exporter_name
=
'exporter'
,
final_exporter_name
=
'exporter'
,
eval_spec_name
=
'holdout'
)
eval_spec_name
s
=
[
'holdout'
]
)
self
.
assertEqual
(
train_steps
,
train_spec
.
max_steps
)
self
.
assertEqual
(
train_steps
,
train_spec
.
max_steps
)
self
.
assertEqual
(
2
,
len
(
eval_specs
))
self
.
assertEqual
(
2
,
len
(
eval_specs
))
self
.
assertEqual
(
eval_steps
,
eval_specs
[
0
].
steps
)
self
.
assertEqual
(
None
,
eval_specs
[
0
].
steps
)
self
.
assertEqual
(
'holdout'
,
eval_specs
[
0
].
name
)
self
.
assertEqual
(
'holdout'
,
eval_specs
[
0
].
name
)
self
.
assertEqual
(
'exporter'
,
eval_specs
[
0
].
exporters
[
0
].
name
)
self
.
assertEqual
(
'exporter
_holdout
'
,
eval_specs
[
0
].
exporters
[
0
].
name
)
self
.
assertEqual
(
eval_on_train_steps
,
eval_specs
[
1
].
steps
)
self
.
assertEqual
(
None
,
eval_specs
[
1
].
steps
)
self
.
assertEqual
(
'eval_on_train'
,
eval_specs
[
1
].
name
)
self
.
assertEqual
(
'eval_on_train'
,
eval_specs
[
1
].
name
)
def
test_experiment
(
self
):
def
test_experiment
(
self
):
...
@@ -339,7 +327,7 @@ class ModelLibTest(tf.test.TestCase):
...
@@ -339,7 +327,7 @@ class ModelLibTest(tf.test.TestCase):
train_steps
=
10
,
train_steps
=
10
,
eval_steps
=
20
)
eval_steps
=
20
)
self
.
assertEqual
(
10
,
experiment
.
train_steps
)
self
.
assertEqual
(
10
,
experiment
.
train_steps
)
self
.
assertEqual
(
20
,
experiment
.
eval_steps
)
self
.
assertEqual
(
None
,
experiment
.
eval_steps
)
class
UnbatchTensorsTest
(
tf
.
test
.
TestCase
):
class
UnbatchTensorsTest
(
tf
.
test
.
TestCase
):
...
...
research/object_detection/model_main.py
View file @
27b4acd4
...
@@ -31,7 +31,16 @@ flags.DEFINE_string(
...
@@ -31,7 +31,16 @@ flags.DEFINE_string(
flags
.
DEFINE_string
(
'pipeline_config_path'
,
None
,
'Path to pipeline config '
flags
.
DEFINE_string
(
'pipeline_config_path'
,
None
,
'Path to pipeline config '
'file.'
)
'file.'
)
flags
.
DEFINE_integer
(
'num_train_steps'
,
None
,
'Number of train steps.'
)
flags
.
DEFINE_integer
(
'num_train_steps'
,
None
,
'Number of train steps.'
)
flags
.
DEFINE_integer
(
'num_eval_steps'
,
None
,
'Number of train steps.'
)
flags
.
DEFINE_boolean
(
'eval_training_data'
,
False
,
'If training data should be evaluated for this job. Note '
'that one call only use this in eval-only mode, and '
'`checkpoint_dir` must be supplied.'
)
flags
.
DEFINE_integer
(
'sample_1_of_n_eval_examples'
,
1
,
'Will sample one of '
'every n eval input examples, where n is provided.'
)
flags
.
DEFINE_integer
(
'sample_1_of_n_eval_on_train_examples'
,
5
,
'Will sample '
'one of every n train input examples for evaluation, '
'where n is provided. This is only used if '
'`eval_training_data` is True.'
)
flags
.
DEFINE_string
(
flags
.
DEFINE_string
(
'hparams_overrides'
,
None
,
'Hyperparameter overrides, '
'hparams_overrides'
,
None
,
'Hyperparameter overrides, '
'represented as a string containing comma-separated '
'represented as a string containing comma-separated '
...
@@ -44,8 +53,6 @@ flags.DEFINE_boolean(
...
@@ -44,8 +53,6 @@ flags.DEFINE_boolean(
'run_once'
,
False
,
'If running in eval-only mode, whether to run just '
'run_once'
,
False
,
'If running in eval-only mode, whether to run just '
'one round of eval vs running continuously (default).'
'one round of eval vs running continuously (default).'
)
)
flags
.
DEFINE_boolean
(
'eval_training_data'
,
False
,
'If training data should be evaluated for this job.'
)
FLAGS
=
flags
.
FLAGS
FLAGS
=
flags
.
FLAGS
...
@@ -59,14 +66,15 @@ def main(unused_argv):
...
@@ -59,14 +66,15 @@ def main(unused_argv):
hparams
=
model_hparams
.
create_hparams
(
FLAGS
.
hparams_overrides
),
hparams
=
model_hparams
.
create_hparams
(
FLAGS
.
hparams_overrides
),
pipeline_config_path
=
FLAGS
.
pipeline_config_path
,
pipeline_config_path
=
FLAGS
.
pipeline_config_path
,
train_steps
=
FLAGS
.
num_train_steps
,
train_steps
=
FLAGS
.
num_train_steps
,
eval_steps
=
FLAGS
.
num_eval_steps
)
sample_1_of_n_eval_examples
=
FLAGS
.
sample_1_of_n_eval_examples
,
sample_1_of_n_eval_on_train_examples
=
(
FLAGS
.
sample_1_of_n_eval_on_train_examples
))
estimator
=
train_and_eval_dict
[
'estimator'
]
estimator
=
train_and_eval_dict
[
'estimator'
]
train_input_fn
=
train_and_eval_dict
[
'train_input_fn'
]
train_input_fn
=
train_and_eval_dict
[
'train_input_fn'
]
eval_input_fn
=
train_and_eval_dict
[
'eval_input_fn'
]
eval_input_fn
s
=
train_and_eval_dict
[
'eval_input_fn
s
'
]
eval_on_train_input_fn
=
train_and_eval_dict
[
'eval_on_train_input_fn'
]
eval_on_train_input_fn
=
train_and_eval_dict
[
'eval_on_train_input_fn'
]
predict_input_fn
=
train_and_eval_dict
[
'predict_input_fn'
]
predict_input_fn
=
train_and_eval_dict
[
'predict_input_fn'
]
train_steps
=
train_and_eval_dict
[
'train_steps'
]
train_steps
=
train_and_eval_dict
[
'train_steps'
]
eval_steps
=
train_and_eval_dict
[
'eval_steps'
]
if
FLAGS
.
checkpoint_dir
:
if
FLAGS
.
checkpoint_dir
:
if
FLAGS
.
eval_training_data
:
if
FLAGS
.
eval_training_data
:
...
@@ -74,23 +82,23 @@ def main(unused_argv):
...
@@ -74,23 +82,23 @@ def main(unused_argv):
input_fn
=
eval_on_train_input_fn
input_fn
=
eval_on_train_input_fn
else
:
else
:
name
=
'validation_data'
name
=
'validation_data'
input_fn
=
eval_input_fn
# The first eval input will be evaluated.
input_fn
=
eval_input_fns
[
0
]
if
FLAGS
.
run_once
:
if
FLAGS
.
run_once
:
estimator
.
evaluate
(
input_fn
,
estimator
.
evaluate
(
input_fn
,
eval_steps
,
num_
eval_steps
=
None
,
checkpoint_path
=
tf
.
train
.
latest_checkpoint
(
checkpoint_path
=
tf
.
train
.
latest_checkpoint
(
FLAGS
.
checkpoint_dir
))
FLAGS
.
checkpoint_dir
))
else
:
else
:
model_lib
.
continuous_eval
(
estimator
,
FLAGS
.
model
_dir
,
input_fn
,
model_lib
.
continuous_eval
(
estimator
,
FLAGS
.
checkpoint
_dir
,
input_fn
,
eval_steps
,
train_steps
,
name
)
train_steps
,
name
)
else
:
else
:
train_spec
,
eval_specs
=
model_lib
.
create_train_and_eval_specs
(
train_spec
,
eval_specs
=
model_lib
.
create_train_and_eval_specs
(
train_input_fn
,
train_input_fn
,
eval_input_fn
,
eval_input_fn
s
,
eval_on_train_input_fn
,
eval_on_train_input_fn
,
predict_input_fn
,
predict_input_fn
,
train_steps
,
train_steps
,
eval_steps
,
eval_on_train_data
=
False
)
eval_on_train_data
=
False
)
# Currently only a single Eval Spec is allowed.
# Currently only a single Eval Spec is allowed.
...
...
research/object_detection/model_tpu_main.py
View file @
27b4acd4
...
@@ -62,15 +62,20 @@ flags.DEFINE_integer('train_batch_size', None, 'Batch size for training. If '
...
@@ -62,15 +62,20 @@ flags.DEFINE_integer('train_batch_size', None, 'Batch size for training. If '
flags
.
DEFINE_string
(
flags
.
DEFINE_string
(
'hparams_overrides'
,
None
,
'Comma-separated list of '
'hparams_overrides'
,
None
,
'Comma-separated list of '
'hyperparameters to override defaults.'
)
'hyperparameters to override defaults.'
)
flags
.
DEFINE_integer
(
'num_train_steps'
,
None
,
'Number of train steps.'
)
flags
.
DEFINE_boolean
(
'eval_training_data'
,
False
,
flags
.
DEFINE_boolean
(
'eval_training_data'
,
False
,
'If training data should be evaluated for this job.'
)
'If training data should be evaluated for this job.'
)
flags
.
DEFINE_integer
(
'sample_1_of_n_eval_examples'
,
1
,
'Will sample one of '
'every n eval input examples, where n is provided.'
)
flags
.
DEFINE_integer
(
'sample_1_of_n_eval_on_train_examples'
,
5
,
'Will sample '
'one of every n train input examples for evaluation, '
'where n is provided. This is only used if '
'`eval_training_data` is True.'
)
flags
.
DEFINE_string
(
flags
.
DEFINE_string
(
'model_dir'
,
None
,
'Path to output model directory '
'model_dir'
,
None
,
'Path to output model directory '
'where event and checkpoint files will be written.'
)
'where event and checkpoint files will be written.'
)
flags
.
DEFINE_string
(
'pipeline_config_path'
,
None
,
'Path to pipeline config '
flags
.
DEFINE_string
(
'pipeline_config_path'
,
None
,
'Path to pipeline config '
'file.'
)
'file.'
)
flags
.
DEFINE_integer
(
'num_train_steps'
,
None
,
'Number of train steps.'
)
flags
.
DEFINE_integer
(
'num_eval_steps'
,
None
,
'Number of train steps.'
)
FLAGS
=
tf
.
flags
.
FLAGS
FLAGS
=
tf
.
flags
.
FLAGS
...
@@ -103,17 +108,18 @@ def main(unused_argv):
...
@@ -103,17 +108,18 @@ def main(unused_argv):
hparams
=
model_hparams
.
create_hparams
(
FLAGS
.
hparams_overrides
),
hparams
=
model_hparams
.
create_hparams
(
FLAGS
.
hparams_overrides
),
pipeline_config_path
=
FLAGS
.
pipeline_config_path
,
pipeline_config_path
=
FLAGS
.
pipeline_config_path
,
train_steps
=
FLAGS
.
num_train_steps
,
train_steps
=
FLAGS
.
num_train_steps
,
eval_steps
=
FLAGS
.
num_eval_steps
,
sample_1_of_n_eval_examples
=
FLAGS
.
sample_1_of_n_eval_examples
,
sample_1_of_n_eval_on_train_examples
=
(
FLAGS
.
sample_1_of_n_eval_on_train_examples
),
use_tpu_estimator
=
True
,
use_tpu_estimator
=
True
,
use_tpu
=
FLAGS
.
use_tpu
,
use_tpu
=
FLAGS
.
use_tpu
,
num_shards
=
FLAGS
.
num_shards
,
num_shards
=
FLAGS
.
num_shards
,
**
kwargs
)
**
kwargs
)
estimator
=
train_and_eval_dict
[
'estimator'
]
estimator
=
train_and_eval_dict
[
'estimator'
]
train_input_fn
=
train_and_eval_dict
[
'train_input_fn'
]
train_input_fn
=
train_and_eval_dict
[
'train_input_fn'
]
eval_input_fn
=
train_and_eval_dict
[
'eval_input_fn'
]
eval_input_fn
s
=
train_and_eval_dict
[
'eval_input_fn
s
'
]
eval_on_train_input_fn
=
train_and_eval_dict
[
'eval_on_train_input_fn'
]
eval_on_train_input_fn
=
train_and_eval_dict
[
'eval_on_train_input_fn'
]
train_steps
=
train_and_eval_dict
[
'train_steps'
]
train_steps
=
train_and_eval_dict
[
'train_steps'
]
eval_steps
=
train_and_eval_dict
[
'eval_steps'
]
if
FLAGS
.
mode
==
'train'
:
if
FLAGS
.
mode
==
'train'
:
estimator
.
train
(
input_fn
=
train_input_fn
,
max_steps
=
train_steps
)
estimator
.
train
(
input_fn
=
train_input_fn
,
max_steps
=
train_steps
)
...
@@ -125,9 +131,10 @@ def main(unused_argv):
...
@@ -125,9 +131,10 @@ def main(unused_argv):
input_fn
=
eval_on_train_input_fn
input_fn
=
eval_on_train_input_fn
else
:
else
:
name
=
'validation_data'
name
=
'validation_data'
input_fn
=
eval_input_fn
# Currently only a single eval input is allowed.
model_lib
.
continuous_eval
(
estimator
,
FLAGS
.
model_dir
,
input_fn
,
eval_steps
,
input_fn
=
eval_input_fns
[
0
]
train_steps
,
name
)
model_lib
.
continuous_eval
(
estimator
,
FLAGS
.
model_dir
,
input_fn
,
train_steps
,
name
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
research/object_detection/models/feature_map_generators.py
View file @
27b4acd4
...
@@ -24,6 +24,7 @@ Feature map generators build on the base feature extractors and produce a list
...
@@ -24,6 +24,7 @@ Feature map generators build on the base feature extractors and produce a list
of final feature maps.
of final feature maps.
"""
"""
import
collections
import
collections
import
functools
import
tensorflow
as
tf
import
tensorflow
as
tf
from
object_detection.utils
import
ops
from
object_detection.utils
import
ops
slim
=
tf
.
contrib
.
slim
slim
=
tf
.
contrib
.
slim
...
@@ -45,6 +46,222 @@ def get_depth_fn(depth_multiplier, min_depth):
...
@@ -45,6 +46,222 @@ def get_depth_fn(depth_multiplier, min_depth):
return
multiply_depth
return
multiply_depth
class
KerasMultiResolutionFeatureMaps
(
tf
.
keras
.
Model
):
"""Generates multi resolution feature maps from input image features.
A Keras model that generates multi-scale feature maps for detection as in the
SSD papers by Liu et al: https://arxiv.org/pdf/1512.02325v2.pdf, See Sec 2.1.
More specifically, when called on inputs it performs the following two tasks:
1) If a layer name is provided in the configuration, returns that layer as a
feature map.
2) If a layer name is left as an empty string, constructs a new feature map
based on the spatial shape and depth configuration. Note that the current
implementation only supports generating new layers using convolution of
stride 2 resulting in a spatial resolution reduction by a factor of 2.
By default convolution kernel size is set to 3, and it can be customized
by caller.
An example of the configuration for Inception V3:
{
'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 128]
}
When this feature generator object is called on input image_features:
Args:
image_features: A dictionary of handles to activation tensors from the
base feature extractor.
Returns:
feature_maps: an OrderedDict mapping keys (feature map names) to
tensors where each tensor has shape [batch, height_i, width_i, depth_i].
"""
def
__init__
(
self
,
feature_map_layout
,
depth_multiplier
,
min_depth
,
insert_1x1_conv
,
is_training
,
conv_hyperparams
,
freeze_batchnorm
,
name
=
None
):
"""Constructor.
Args:
feature_map_layout: Dictionary of specifications for the feature map
layouts in the following format (Inception V2/V3 respectively):
{
'from_layer': ['Mixed_3c', 'Mixed_4c', 'Mixed_5c', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 128]
}
or
{
'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 128]
}
If 'from_layer' is specified, the specified feature map is directly used
as a box predictor layer, and the layer_depth is directly infered from
the feature map (instead of using the provided 'layer_depth' parameter).
In this case, our convention is to set 'layer_depth' to -1 for clarity.
Otherwise, if 'from_layer' is an empty string, then the box predictor
layer will be built from the previous layer using convolution
operations. Note that the current implementation only supports
generating new layers using convolutions of stride 2 (resulting in a
spatial resolution reduction by a factor of 2), and will be extended to
a more flexible design. Convolution kernel size is set to 3 by default,
and can be customized by 'conv_kernel_size' parameter (similarily,
'conv_kernel_size' should be set to -1 if 'from_layer' is specified).
The created convolution operation will be a normal 2D convolution by
default, and a depthwise convolution followed by 1x1 convolution if
'use_depthwise' is set to True.
depth_multiplier: Depth multiplier for convolutional layers.
min_depth: Minimum depth for convolutional layers.
insert_1x1_conv: A boolean indicating whether an additional 1x1
convolution should be inserted before shrinking the feature map.
is_training: Indicates whether the feature generator is in training mode.
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
containing hyperparameters for convolution ops.
freeze_batchnorm: Bool. Whether to freeze batch norm parameters during
training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm
params.
name: A string name scope to assign to the model. If 'None', Keras
will auto-generate one from the class name.
"""
super
(
KerasMultiResolutionFeatureMaps
,
self
).
__init__
(
name
=
name
)
self
.
feature_map_layout
=
feature_map_layout
self
.
convolutions
=
[]
depth_fn
=
get_depth_fn
(
depth_multiplier
,
min_depth
)
base_from_layer
=
''
use_explicit_padding
=
False
if
'use_explicit_padding'
in
feature_map_layout
:
use_explicit_padding
=
feature_map_layout
[
'use_explicit_padding'
]
use_depthwise
=
False
if
'use_depthwise'
in
feature_map_layout
:
use_depthwise
=
feature_map_layout
[
'use_depthwise'
]
for
index
,
from_layer
in
enumerate
(
feature_map_layout
[
'from_layer'
]):
net
=
[]
self
.
convolutions
.
append
(
net
)
layer_depth
=
feature_map_layout
[
'layer_depth'
][
index
]
conv_kernel_size
=
3
if
'conv_kernel_size'
in
feature_map_layout
:
conv_kernel_size
=
feature_map_layout
[
'conv_kernel_size'
][
index
]
if
from_layer
:
base_from_layer
=
from_layer
else
:
if
insert_1x1_conv
:
layer_name
=
'{}_1_Conv2d_{}_1x1_{}'
.
format
(
base_from_layer
,
index
,
depth_fn
(
layer_depth
/
2
))
net
.
append
(
tf
.
keras
.
layers
.
Conv2D
(
depth_fn
(
layer_depth
/
2
),
[
1
,
1
],
padding
=
'SAME'
,
strides
=
1
,
name
=
layer_name
+
'_conv'
,
**
conv_hyperparams
.
params
()))
net
.
append
(
conv_hyperparams
.
build_batch_norm
(
training
=
(
is_training
and
not
freeze_batchnorm
),
name
=
layer_name
+
'_batchnorm'
))
net
.
append
(
conv_hyperparams
.
build_activation_layer
(
name
=
layer_name
))
layer_name
=
'{}_2_Conv2d_{}_{}x{}_s2_{}'
.
format
(
base_from_layer
,
index
,
conv_kernel_size
,
conv_kernel_size
,
depth_fn
(
layer_depth
))
stride
=
2
padding
=
'SAME'
if
use_explicit_padding
:
padding
=
'VALID'
# We define this function here while capturing the value of
# conv_kernel_size, to avoid holding a reference to the loop variable
# conv_kernel_size inside of a lambda function
def
fixed_padding
(
features
,
kernel_size
=
conv_kernel_size
):
return
ops
.
fixed_padding
(
features
,
kernel_size
)
net
.
append
(
tf
.
keras
.
layers
.
Lambda
(
fixed_padding
))
# TODO(rathodv): Add some utilities to simplify the creation of
# Depthwise & non-depthwise convolutions w/ normalization & activations
if
use_depthwise
:
net
.
append
(
tf
.
keras
.
layers
.
DepthwiseConv2D
(
[
conv_kernel_size
,
conv_kernel_size
],
depth_multiplier
=
1
,
padding
=
padding
,
strides
=
stride
,
name
=
layer_name
+
'_depthwise_conv'
,
**
conv_hyperparams
.
params
()))
net
.
append
(
conv_hyperparams
.
build_batch_norm
(
training
=
(
is_training
and
not
freeze_batchnorm
),
name
=
layer_name
+
'_depthwise_batchnorm'
))
net
.
append
(
conv_hyperparams
.
build_activation_layer
(
name
=
layer_name
+
'_depthwise'
))
net
.
append
(
tf
.
keras
.
layers
.
Conv2D
(
depth_fn
(
layer_depth
),
[
1
,
1
],
padding
=
'SAME'
,
strides
=
1
,
name
=
layer_name
+
'_conv'
,
**
conv_hyperparams
.
params
()))
net
.
append
(
conv_hyperparams
.
build_batch_norm
(
training
=
(
is_training
and
not
freeze_batchnorm
),
name
=
layer_name
+
'_batchnorm'
))
net
.
append
(
conv_hyperparams
.
build_activation_layer
(
name
=
layer_name
))
else
:
net
.
append
(
tf
.
keras
.
layers
.
Conv2D
(
depth_fn
(
layer_depth
),
[
conv_kernel_size
,
conv_kernel_size
],
padding
=
padding
,
strides
=
stride
,
name
=
layer_name
+
'_conv'
,
**
conv_hyperparams
.
params
()))
net
.
append
(
conv_hyperparams
.
build_batch_norm
(
training
=
(
is_training
and
not
freeze_batchnorm
),
name
=
layer_name
+
'_batchnorm'
))
net
.
append
(
conv_hyperparams
.
build_activation_layer
(
name
=
layer_name
))
def
call
(
self
,
image_features
):
"""Generate the multi-resolution feature maps.
Executed when calling the `.__call__` method on input.
Args:
image_features: A dictionary of handles to activation tensors from the
base feature extractor.
Returns:
feature_maps: an OrderedDict mapping keys (feature map names) to
tensors where each tensor has shape [batch, height_i, width_i, depth_i].
"""
feature_maps
=
[]
feature_map_keys
=
[]
for
index
,
from_layer
in
enumerate
(
self
.
feature_map_layout
[
'from_layer'
]):
if
from_layer
:
feature_map
=
image_features
[
from_layer
]
feature_map_keys
.
append
(
from_layer
)
else
:
feature_map
=
feature_maps
[
-
1
]
for
layer
in
self
.
convolutions
[
index
]:
feature_map
=
layer
(
feature_map
)
layer_name
=
self
.
convolutions
[
index
][
-
1
].
name
feature_map_keys
.
append
(
layer_name
)
feature_maps
.
append
(
feature_map
)
return
collections
.
OrderedDict
(
[(
x
,
y
)
for
(
x
,
y
)
in
zip
(
feature_map_keys
,
feature_maps
)])
def
multi_resolution_feature_maps
(
feature_map_layout
,
depth_multiplier
,
def
multi_resolution_feature_maps
(
feature_map_layout
,
depth_multiplier
,
min_depth
,
insert_1x1_conv
,
image_features
):
min_depth
,
insert_1x1_conv
,
image_features
):
"""Generates multi resolution feature maps from input image features.
"""Generates multi resolution feature maps from input image features.
...
@@ -77,7 +294,7 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
...
@@ -77,7 +294,7 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
}
}
or
or
{
{
'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '',
'',
''],
'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 128]
'layer_depth': [-1, -1, -1, 512, 256, 128]
}
}
If 'from_layer' is specified, the specified feature map is directly used
If 'from_layer' is specified, the specified feature map is directly used
...
@@ -179,7 +396,10 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
...
@@ -179,7 +396,10 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
[(
x
,
y
)
for
(
x
,
y
)
in
zip
(
feature_map_keys
,
feature_maps
)])
[(
x
,
y
)
for
(
x
,
y
)
in
zip
(
feature_map_keys
,
feature_maps
)])
def
fpn_top_down_feature_maps
(
image_features
,
depth
,
scope
=
None
):
def
fpn_top_down_feature_maps
(
image_features
,
depth
,
use_depthwise
=
False
,
scope
=
None
):
"""Generates `top-down` feature maps for Feature Pyramid Networks.
"""Generates `top-down` feature maps for Feature Pyramid Networks.
See https://arxiv.org/abs/1612.03144 for details.
See https://arxiv.org/abs/1612.03144 for details.
...
@@ -189,6 +409,7 @@ def fpn_top_down_feature_maps(image_features, depth, scope=None):
...
@@ -189,6 +409,7 @@ def fpn_top_down_feature_maps(image_features, depth, scope=None):
Spatial resolutions of succesive tensors must reduce exactly by a factor
Spatial resolutions of succesive tensors must reduce exactly by a factor
of 2.
of 2.
depth: depth of output feature maps.
depth: depth of output feature maps.
use_depthwise: use depthwise separable conv instead of regular conv.
scope: A scope name to wrap this op under.
scope: A scope name to wrap this op under.
Returns:
Returns:
...
@@ -200,7 +421,7 @@ def fpn_top_down_feature_maps(image_features, depth, scope=None):
...
@@ -200,7 +421,7 @@ def fpn_top_down_feature_maps(image_features, depth, scope=None):
output_feature_maps_list
=
[]
output_feature_maps_list
=
[]
output_feature_map_keys
=
[]
output_feature_map_keys
=
[]
with
slim
.
arg_scope
(
with
slim
.
arg_scope
(
[
slim
.
conv2d
],
padding
=
'SAME'
,
stride
=
1
):
[
slim
.
conv2d
,
slim
.
separable_conv2d
],
padding
=
'SAME'
,
stride
=
1
):
top_down
=
slim
.
conv2d
(
top_down
=
slim
.
conv2d
(
image_features
[
-
1
][
1
],
image_features
[
-
1
][
1
],
depth
,
[
1
,
1
],
activation_fn
=
None
,
normalizer_fn
=
None
,
depth
,
[
1
,
1
],
activation_fn
=
None
,
normalizer_fn
=
None
,
...
@@ -216,7 +437,11 @@ def fpn_top_down_feature_maps(image_features, depth, scope=None):
...
@@ -216,7 +437,11 @@ def fpn_top_down_feature_maps(image_features, depth, scope=None):
activation_fn
=
None
,
normalizer_fn
=
None
,
activation_fn
=
None
,
normalizer_fn
=
None
,
scope
=
'projection_%d'
%
(
level
+
1
))
scope
=
'projection_%d'
%
(
level
+
1
))
top_down
+=
residual
top_down
+=
residual
output_feature_maps_list
.
append
(
slim
.
conv2d
(
if
use_depthwise
:
conv_op
=
functools
.
partial
(
slim
.
separable_conv2d
,
depth_multiplier
=
1
)
else
:
conv_op
=
slim
.
conv2d
output_feature_maps_list
.
append
(
conv_op
(
top_down
,
top_down
,
depth
,
[
3
,
3
],
depth
,
[
3
,
3
],
scope
=
'smoothing_%d'
%
(
level
+
1
)))
scope
=
'smoothing_%d'
%
(
level
+
1
)))
...
@@ -226,7 +451,7 @@ def fpn_top_down_feature_maps(image_features, depth, scope=None):
...
@@ -226,7 +451,7 @@ def fpn_top_down_feature_maps(image_features, depth, scope=None):
def
pooling_pyramid_feature_maps
(
base_feature_map_depth
,
num_layers
,
def
pooling_pyramid_feature_maps
(
base_feature_map_depth
,
num_layers
,
image_features
):
image_features
,
replace_pool_with_conv
=
False
):
"""Generates pooling pyramid feature maps.
"""Generates pooling pyramid feature maps.
The pooling pyramid feature maps is motivated by
The pooling pyramid feature maps is motivated by
...
@@ -250,6 +475,8 @@ def pooling_pyramid_feature_maps(base_feature_map_depth, num_layers,
...
@@ -250,6 +475,8 @@ def pooling_pyramid_feature_maps(base_feature_map_depth, num_layers,
from the base feature.
from the base feature.
image_features: A dictionary of handles to activation tensors from the
image_features: A dictionary of handles to activation tensors from the
feature extractor.
feature extractor.
replace_pool_with_conv: Whether or not to replace pooling operations with
convolutions in the PPN. Default is False.
Returns:
Returns:
feature_maps: an OrderedDict mapping keys (feature map names) to
feature_maps: an OrderedDict mapping keys (feature map names) to
...
@@ -279,12 +506,22 @@ def pooling_pyramid_feature_maps(base_feature_map_depth, num_layers,
...
@@ -279,12 +506,22 @@ def pooling_pyramid_feature_maps(base_feature_map_depth, num_layers,
feature_map_keys
.
append
(
feature_map_key
)
feature_map_keys
.
append
(
feature_map_key
)
feature_maps
.
append
(
image_features
)
feature_maps
.
append
(
image_features
)
feature_map
=
image_features
feature_map
=
image_features
with
slim
.
arg_scope
([
slim
.
max_pool2d
],
padding
=
'SAME'
,
stride
=
2
):
if
replace_pool_with_conv
:
for
i
in
range
(
num_layers
-
1
):
with
slim
.
arg_scope
([
slim
.
conv2d
],
padding
=
'SAME'
,
stride
=
2
):
feature_map_key
=
'MaxPool2d_%d_2x2'
%
i
for
i
in
range
(
num_layers
-
1
):
feature_map
=
slim
.
max_pool2d
(
feature_map_key
=
'Conv2d_{}_3x3_s2_{}'
.
format
(
i
,
feature_map
,
[
2
,
2
],
padding
=
'SAME'
,
scope
=
feature_map_key
)
base_feature_map_depth
)
feature_map_keys
.
append
(
feature_map_key
)
feature_map
=
slim
.
conv2d
(
feature_maps
.
append
(
feature_map
)
feature_map
,
base_feature_map_depth
,
[
3
,
3
],
scope
=
feature_map_key
)
feature_map_keys
.
append
(
feature_map_key
)
feature_maps
.
append
(
feature_map
)
else
:
with
slim
.
arg_scope
([
slim
.
max_pool2d
],
padding
=
'SAME'
,
stride
=
2
):
for
i
in
range
(
num_layers
-
1
):
feature_map_key
=
'MaxPool2d_%d_2x2'
%
i
feature_map
=
slim
.
max_pool2d
(
feature_map
,
[
2
,
2
],
padding
=
'SAME'
,
scope
=
feature_map_key
)
feature_map_keys
.
append
(
feature_map_key
)
feature_maps
.
append
(
feature_map
)
return
collections
.
OrderedDict
(
return
collections
.
OrderedDict
(
[(
x
,
y
)
for
(
x
,
y
)
in
zip
(
feature_map_keys
,
feature_maps
)])
[(
x
,
y
)
for
(
x
,
y
)
in
zip
(
feature_map_keys
,
feature_maps
)])
research/object_detection/models/feature_map_generators_test.py
View file @
27b4acd4
...
@@ -15,9 +15,15 @@
...
@@ -15,9 +15,15 @@
"""Tests for feature map generators."""
"""Tests for feature map generators."""
from
absl.testing
import
parameterized
import
tensorflow
as
tf
import
tensorflow
as
tf
from
google.protobuf
import
text_format
from
object_detection.builders
import
hyperparams_builder
from
object_detection.models
import
feature_map_generators
from
object_detection.models
import
feature_map_generators
from
object_detection.protos
import
hyperparams_pb2
INCEPTION_V2_LAYOUT
=
{
INCEPTION_V2_LAYOUT
=
{
'from_layer'
:
[
'Mixed_3c'
,
'Mixed_4c'
,
'Mixed_5c'
,
''
,
''
,
''
],
'from_layer'
:
[
'Mixed_3c'
,
'Mixed_4c'
,
'Mixed_5c'
,
''
,
''
,
''
],
...
@@ -40,21 +46,60 @@ EMBEDDED_SSD_MOBILENET_V1_LAYOUT = {
...
@@ -40,21 +46,60 @@ EMBEDDED_SSD_MOBILENET_V1_LAYOUT = {
}
}
# TODO(rathodv): add tests with different anchor strides.
@
parameterized
.
parameters
(
{
'use_keras'
:
False
},
{
'use_keras'
:
True
},
)
class
MultiResolutionFeatureMapGeneratorTest
(
tf
.
test
.
TestCase
):
class
MultiResolutionFeatureMapGeneratorTest
(
tf
.
test
.
TestCase
):
def
test_get_expected_feature_map_shapes_with_inception_v2
(
self
):
def
_build_conv_hyperparams
(
self
):
conv_hyperparams
=
hyperparams_pb2
.
Hyperparams
()
conv_hyperparams_text_proto
=
"""
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
"""
text_format
.
Merge
(
conv_hyperparams_text_proto
,
conv_hyperparams
)
return
hyperparams_builder
.
KerasLayerHyperparams
(
conv_hyperparams
)
def
_build_feature_map_generator
(
self
,
feature_map_layout
,
use_keras
):
if
use_keras
:
return
feature_map_generators
.
KerasMultiResolutionFeatureMaps
(
feature_map_layout
=
feature_map_layout
,
depth_multiplier
=
1
,
min_depth
=
32
,
insert_1x1_conv
=
True
,
freeze_batchnorm
=
False
,
is_training
=
True
,
conv_hyperparams
=
self
.
_build_conv_hyperparams
(),
name
=
'FeatureMaps'
)
else
:
def
feature_map_generator
(
image_features
):
return
feature_map_generators
.
multi_resolution_feature_maps
(
feature_map_layout
=
feature_map_layout
,
depth_multiplier
=
1
,
min_depth
=
32
,
insert_1x1_conv
=
True
,
image_features
=
image_features
)
return
feature_map_generator
def
test_get_expected_feature_map_shapes_with_inception_v2
(
self
,
use_keras
):
image_features
=
{
image_features
=
{
'Mixed_3c'
:
tf
.
random_uniform
([
4
,
28
,
28
,
256
],
dtype
=
tf
.
float32
),
'Mixed_3c'
:
tf
.
random_uniform
([
4
,
28
,
28
,
256
],
dtype
=
tf
.
float32
),
'Mixed_4c'
:
tf
.
random_uniform
([
4
,
14
,
14
,
576
],
dtype
=
tf
.
float32
),
'Mixed_4c'
:
tf
.
random_uniform
([
4
,
14
,
14
,
576
],
dtype
=
tf
.
float32
),
'Mixed_5c'
:
tf
.
random_uniform
([
4
,
7
,
7
,
1024
],
dtype
=
tf
.
float32
)
'Mixed_5c'
:
tf
.
random_uniform
([
4
,
7
,
7
,
1024
],
dtype
=
tf
.
float32
)
}
}
feature_map
s
=
feature_map_generators
.
multi_resolution
_feature_map
s
(
feature_map
_generator
=
self
.
_build
_feature_map
_generator
(
feature_map_layout
=
INCEPTION_V2_LAYOUT
,
feature_map_layout
=
INCEPTION_V2_LAYOUT
,
depth_multiplier
=
1
,
use_keras
=
use_keras
min_depth
=
32
,
)
insert_1x1_conv
=
True
,
feature_maps
=
feature_map_generator
(
image_features
)
image_features
=
image_features
)
expected_feature_map_shapes
=
{
expected_feature_map_shapes
=
{
'Mixed_3c'
:
(
4
,
28
,
28
,
256
),
'Mixed_3c'
:
(
4
,
28
,
28
,
256
),
...
@@ -70,21 +115,53 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
...
@@ -70,21 +115,53 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
out_feature_maps
=
sess
.
run
(
feature_maps
)
out_feature_maps
=
sess
.
run
(
feature_maps
)
out_feature_map_shapes
=
dict
(
out_feature_map_shapes
=
dict
(
(
key
,
value
.
shape
)
for
key
,
value
in
out_feature_maps
.
items
())
(
key
,
value
.
shape
)
for
key
,
value
in
out_feature_maps
.
items
())
self
.
assertDictEqual
(
out_feature_map_shapes
,
expected_feature_map_shapes
)
self
.
assertDictEqual
(
expected_feature_map_shapes
,
out_feature_map_shapes
)
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
def
test_get_expected_feature_map_shapes_use_explicit_padding
(
self
,
use_keras
):
image_features
=
{
'Mixed_3c'
:
tf
.
random_uniform
([
4
,
28
,
28
,
256
],
dtype
=
tf
.
float32
),
'Mixed_4c'
:
tf
.
random_uniform
([
4
,
14
,
14
,
576
],
dtype
=
tf
.
float32
),
'Mixed_5c'
:
tf
.
random_uniform
([
4
,
7
,
7
,
1024
],
dtype
=
tf
.
float32
)
}
layout_copy
=
INCEPTION_V2_LAYOUT
.
copy
()
layout_copy
[
'use_explicit_padding'
]
=
True
feature_map_generator
=
self
.
_build_feature_map_generator
(
feature_map_layout
=
layout_copy
,
use_keras
=
use_keras
)
feature_maps
=
feature_map_generator
(
image_features
)
expected_feature_map_shapes
=
{
'Mixed_3c'
:
(
4
,
28
,
28
,
256
),
'Mixed_4c'
:
(
4
,
14
,
14
,
576
),
'Mixed_5c'
:
(
4
,
7
,
7
,
1024
),
'Mixed_5c_2_Conv2d_3_3x3_s2_512'
:
(
4
,
4
,
4
,
512
),
'Mixed_5c_2_Conv2d_4_3x3_s2_256'
:
(
4
,
2
,
2
,
256
),
'Mixed_5c_2_Conv2d_5_3x3_s2_256'
:
(
4
,
1
,
1
,
256
)}
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
out_feature_maps
=
sess
.
run
(
feature_maps
)
out_feature_map_shapes
=
dict
(
(
key
,
value
.
shape
)
for
key
,
value
in
out_feature_maps
.
items
())
self
.
assertDictEqual
(
expected_feature_map_shapes
,
out_feature_map_shapes
)
def
test_get_expected_feature_map_shapes_with_inception_v3
(
self
):
def
test_get_expected_feature_map_shapes_with_inception_v3
(
self
,
use_keras
):
image_features
=
{
image_features
=
{
'Mixed_5d'
:
tf
.
random_uniform
([
4
,
35
,
35
,
256
],
dtype
=
tf
.
float32
),
'Mixed_5d'
:
tf
.
random_uniform
([
4
,
35
,
35
,
256
],
dtype
=
tf
.
float32
),
'Mixed_6e'
:
tf
.
random_uniform
([
4
,
17
,
17
,
576
],
dtype
=
tf
.
float32
),
'Mixed_6e'
:
tf
.
random_uniform
([
4
,
17
,
17
,
576
],
dtype
=
tf
.
float32
),
'Mixed_7c'
:
tf
.
random_uniform
([
4
,
8
,
8
,
1024
],
dtype
=
tf
.
float32
)
'Mixed_7c'
:
tf
.
random_uniform
([
4
,
8
,
8
,
1024
],
dtype
=
tf
.
float32
)
}
}
feature_map
s
=
feature_map_generators
.
multi_resolution
_feature_map
s
(
feature_map
_generator
=
self
.
_build
_feature_map
_generator
(
feature_map_layout
=
INCEPTION_V3_LAYOUT
,
feature_map_layout
=
INCEPTION_V3_LAYOUT
,
depth_multiplier
=
1
,
use_keras
=
use_keras
min_depth
=
32
,
)
insert_1x1_conv
=
True
,
feature_maps
=
feature_map_generator
(
image_features
)
image_features
=
image_features
)
expected_feature_map_shapes
=
{
expected_feature_map_shapes
=
{
'Mixed_5d'
:
(
4
,
35
,
35
,
256
),
'Mixed_5d'
:
(
4
,
35
,
35
,
256
),
...
@@ -100,10 +177,10 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
...
@@ -100,10 +177,10 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
out_feature_maps
=
sess
.
run
(
feature_maps
)
out_feature_maps
=
sess
.
run
(
feature_maps
)
out_feature_map_shapes
=
dict
(
out_feature_map_shapes
=
dict
(
(
key
,
value
.
shape
)
for
key
,
value
in
out_feature_maps
.
items
())
(
key
,
value
.
shape
)
for
key
,
value
in
out_feature_maps
.
items
())
self
.
assertDictEqual
(
out
_feature_map_shapes
,
expected
_feature_map_shapes
)
self
.
assertDictEqual
(
expected
_feature_map_shapes
,
out
_feature_map_shapes
)
def
test_get_expected_feature_map_shapes_with_embedded_ssd_mobilenet_v1
(
def
test_get_expected_feature_map_shapes_with_embedded_ssd_mobilenet_v1
(
self
):
self
,
use_keras
):
image_features
=
{
image_features
=
{
'Conv2d_11_pointwise'
:
tf
.
random_uniform
([
4
,
16
,
16
,
512
],
'Conv2d_11_pointwise'
:
tf
.
random_uniform
([
4
,
16
,
16
,
512
],
dtype
=
tf
.
float32
),
dtype
=
tf
.
float32
),
...
@@ -111,12 +188,11 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
...
@@ -111,12 +188,11 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
dtype
=
tf
.
float32
),
dtype
=
tf
.
float32
),
}
}
feature_map
s
=
feature_map_generators
.
multi_resolution
_feature_map
s
(
feature_map
_generator
=
self
.
_build
_feature_map
_generator
(
feature_map_layout
=
EMBEDDED_SSD_MOBILENET_V1_LAYOUT
,
feature_map_layout
=
EMBEDDED_SSD_MOBILENET_V1_LAYOUT
,
depth_multiplier
=
1
,
use_keras
=
use_keras
min_depth
=
32
,
)
insert_1x1_conv
=
True
,
feature_maps
=
feature_map_generator
(
image_features
)
image_features
=
image_features
)
expected_feature_map_shapes
=
{
expected_feature_map_shapes
=
{
'Conv2d_11_pointwise'
:
(
4
,
16
,
16
,
512
),
'Conv2d_11_pointwise'
:
(
4
,
16
,
16
,
512
),
...
@@ -131,7 +207,62 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
...
@@ -131,7 +207,62 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
out_feature_maps
=
sess
.
run
(
feature_maps
)
out_feature_maps
=
sess
.
run
(
feature_maps
)
out_feature_map_shapes
=
dict
(
out_feature_map_shapes
=
dict
(
(
key
,
value
.
shape
)
for
key
,
value
in
out_feature_maps
.
items
())
(
key
,
value
.
shape
)
for
key
,
value
in
out_feature_maps
.
items
())
self
.
assertDictEqual
(
out_feature_map_shapes
,
expected_feature_map_shapes
)
self
.
assertDictEqual
(
expected_feature_map_shapes
,
out_feature_map_shapes
)
def
test_get_expected_variable_names_with_inception_v2
(
self
,
use_keras
):
image_features
=
{
'Mixed_3c'
:
tf
.
random_uniform
([
4
,
28
,
28
,
256
],
dtype
=
tf
.
float32
),
'Mixed_4c'
:
tf
.
random_uniform
([
4
,
14
,
14
,
576
],
dtype
=
tf
.
float32
),
'Mixed_5c'
:
tf
.
random_uniform
([
4
,
7
,
7
,
1024
],
dtype
=
tf
.
float32
)
}
feature_map_generator
=
self
.
_build_feature_map_generator
(
feature_map_layout
=
INCEPTION_V2_LAYOUT
,
use_keras
=
use_keras
)
feature_maps
=
feature_map_generator
(
image_features
)
expected_slim_variables
=
set
([
'Mixed_5c_1_Conv2d_3_1x1_256/weights'
,
'Mixed_5c_1_Conv2d_3_1x1_256/biases'
,
'Mixed_5c_2_Conv2d_3_3x3_s2_512/weights'
,
'Mixed_5c_2_Conv2d_3_3x3_s2_512/biases'
,
'Mixed_5c_1_Conv2d_4_1x1_128/weights'
,
'Mixed_5c_1_Conv2d_4_1x1_128/biases'
,
'Mixed_5c_2_Conv2d_4_3x3_s2_256/weights'
,
'Mixed_5c_2_Conv2d_4_3x3_s2_256/biases'
,
'Mixed_5c_1_Conv2d_5_1x1_128/weights'
,
'Mixed_5c_1_Conv2d_5_1x1_128/biases'
,
'Mixed_5c_2_Conv2d_5_3x3_s2_256/weights'
,
'Mixed_5c_2_Conv2d_5_3x3_s2_256/biases'
,
])
expected_keras_variables
=
set
([
'FeatureMaps/Mixed_5c_1_Conv2d_3_1x1_256_conv/kernel'
,
'FeatureMaps/Mixed_5c_1_Conv2d_3_1x1_256_conv/bias'
,
'FeatureMaps/Mixed_5c_2_Conv2d_3_3x3_s2_512_conv/kernel'
,
'FeatureMaps/Mixed_5c_2_Conv2d_3_3x3_s2_512_conv/bias'
,
'FeatureMaps/Mixed_5c_1_Conv2d_4_1x1_128_conv/kernel'
,
'FeatureMaps/Mixed_5c_1_Conv2d_4_1x1_128_conv/bias'
,
'FeatureMaps/Mixed_5c_2_Conv2d_4_3x3_s2_256_conv/kernel'
,
'FeatureMaps/Mixed_5c_2_Conv2d_4_3x3_s2_256_conv/bias'
,
'FeatureMaps/Mixed_5c_1_Conv2d_5_1x1_128_conv/kernel'
,
'FeatureMaps/Mixed_5c_1_Conv2d_5_1x1_128_conv/bias'
,
'FeatureMaps/Mixed_5c_2_Conv2d_5_3x3_s2_256_conv/kernel'
,
'FeatureMaps/Mixed_5c_2_Conv2d_5_3x3_s2_256_conv/bias'
,
])
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
sess
.
run
(
feature_maps
)
actual_variable_set
=
set
(
[
var
.
op
.
name
for
var
in
tf
.
trainable_variables
()])
if
use_keras
:
self
.
assertSetEqual
(
expected_keras_variables
,
actual_variable_set
)
else
:
self
.
assertSetEqual
(
expected_slim_variables
,
actual_variable_set
)
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
class
FPNFeatureMapGeneratorTest
(
tf
.
test
.
TestCase
):
class
FPNFeatureMapGeneratorTest
(
tf
.
test
.
TestCase
):
...
@@ -161,6 +292,31 @@ class FPNFeatureMapGeneratorTest(tf.test.TestCase):
...
@@ -161,6 +292,31 @@ class FPNFeatureMapGeneratorTest(tf.test.TestCase):
for
key
,
value
in
out_feature_maps
.
items
()}
for
key
,
value
in
out_feature_maps
.
items
()}
self
.
assertDictEqual
(
out_feature_map_shapes
,
expected_feature_map_shapes
)
self
.
assertDictEqual
(
out_feature_map_shapes
,
expected_feature_map_shapes
)
def
test_get_expected_feature_map_shapes_with_depthwise
(
self
):
image_features
=
[
(
'block2'
,
tf
.
random_uniform
([
4
,
8
,
8
,
256
],
dtype
=
tf
.
float32
)),
(
'block3'
,
tf
.
random_uniform
([
4
,
4
,
4
,
256
],
dtype
=
tf
.
float32
)),
(
'block4'
,
tf
.
random_uniform
([
4
,
2
,
2
,
256
],
dtype
=
tf
.
float32
)),
(
'block5'
,
tf
.
random_uniform
([
4
,
1
,
1
,
256
],
dtype
=
tf
.
float32
))
]
feature_maps
=
feature_map_generators
.
fpn_top_down_feature_maps
(
image_features
=
image_features
,
depth
=
128
,
use_depthwise
=
True
)
expected_feature_map_shapes
=
{
'top_down_block2'
:
(
4
,
8
,
8
,
128
),
'top_down_block3'
:
(
4
,
4
,
4
,
128
),
'top_down_block4'
:
(
4
,
2
,
2
,
128
),
'top_down_block5'
:
(
4
,
1
,
1
,
128
)
}
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
out_feature_maps
=
sess
.
run
(
feature_maps
)
out_feature_map_shapes
=
{
key
:
value
.
shape
for
key
,
value
in
out_feature_maps
.
items
()}
self
.
assertDictEqual
(
out_feature_map_shapes
,
expected_feature_map_shapes
)
class
GetDepthFunctionTest
(
tf
.
test
.
TestCase
):
class
GetDepthFunctionTest
(
tf
.
test
.
TestCase
):
...
@@ -175,5 +331,94 @@ class GetDepthFunctionTest(tf.test.TestCase):
...
@@ -175,5 +331,94 @@ class GetDepthFunctionTest(tf.test.TestCase):
self
.
assertEqual
(
depth_fn
(
64
),
32
)
self
.
assertEqual
(
depth_fn
(
64
),
32
)
@
parameterized
.
parameters
(
{
'replace_pool_with_conv'
:
False
},
{
'replace_pool_with_conv'
:
True
},
)
class
PoolingPyramidFeatureMapGeneratorTest
(
tf
.
test
.
TestCase
):
def
test_get_expected_feature_map_shapes
(
self
,
replace_pool_with_conv
):
image_features
=
{
'image_features'
:
tf
.
random_uniform
([
4
,
19
,
19
,
1024
])
}
feature_maps
=
feature_map_generators
.
pooling_pyramid_feature_maps
(
base_feature_map_depth
=
1024
,
num_layers
=
6
,
image_features
=
image_features
,
replace_pool_with_conv
=
replace_pool_with_conv
)
expected_pool_feature_map_shapes
=
{
'Base_Conv2d_1x1_1024'
:
(
4
,
19
,
19
,
1024
),
'MaxPool2d_0_2x2'
:
(
4
,
10
,
10
,
1024
),
'MaxPool2d_1_2x2'
:
(
4
,
5
,
5
,
1024
),
'MaxPool2d_2_2x2'
:
(
4
,
3
,
3
,
1024
),
'MaxPool2d_3_2x2'
:
(
4
,
2
,
2
,
1024
),
'MaxPool2d_4_2x2'
:
(
4
,
1
,
1
,
1024
),
}
expected_conv_feature_map_shapes
=
{
'Base_Conv2d_1x1_1024'
:
(
4
,
19
,
19
,
1024
),
'Conv2d_0_3x3_s2_1024'
:
(
4
,
10
,
10
,
1024
),
'Conv2d_1_3x3_s2_1024'
:
(
4
,
5
,
5
,
1024
),
'Conv2d_2_3x3_s2_1024'
:
(
4
,
3
,
3
,
1024
),
'Conv2d_3_3x3_s2_1024'
:
(
4
,
2
,
2
,
1024
),
'Conv2d_4_3x3_s2_1024'
:
(
4
,
1
,
1
,
1024
),
}
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
out_feature_maps
=
sess
.
run
(
feature_maps
)
out_feature_map_shapes
=
{
key
:
value
.
shape
for
key
,
value
in
out_feature_maps
.
items
()}
if
replace_pool_with_conv
:
self
.
assertDictEqual
(
expected_conv_feature_map_shapes
,
out_feature_map_shapes
)
else
:
self
.
assertDictEqual
(
expected_pool_feature_map_shapes
,
out_feature_map_shapes
)
def
test_get_expected_variable_names
(
self
,
replace_pool_with_conv
):
image_features
=
{
'image_features'
:
tf
.
random_uniform
([
4
,
19
,
19
,
1024
])
}
feature_maps
=
feature_map_generators
.
pooling_pyramid_feature_maps
(
base_feature_map_depth
=
1024
,
num_layers
=
6
,
image_features
=
image_features
,
replace_pool_with_conv
=
replace_pool_with_conv
)
expected_pool_variables
=
set
([
'Base_Conv2d_1x1_1024/weights'
,
'Base_Conv2d_1x1_1024/biases'
,
])
expected_conv_variables
=
set
([
'Base_Conv2d_1x1_1024/weights'
,
'Base_Conv2d_1x1_1024/biases'
,
'Conv2d_0_3x3_s2_1024/weights'
,
'Conv2d_0_3x3_s2_1024/biases'
,
'Conv2d_1_3x3_s2_1024/weights'
,
'Conv2d_1_3x3_s2_1024/biases'
,
'Conv2d_2_3x3_s2_1024/weights'
,
'Conv2d_2_3x3_s2_1024/biases'
,
'Conv2d_3_3x3_s2_1024/weights'
,
'Conv2d_3_3x3_s2_1024/biases'
,
'Conv2d_4_3x3_s2_1024/weights'
,
'Conv2d_4_3x3_s2_1024/biases'
,
])
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
sess
.
run
(
feature_maps
)
actual_variable_set
=
set
(
[
var
.
op
.
name
for
var
in
tf
.
trainable_variables
()])
if
replace_pool_with_conv
:
self
.
assertSetEqual
(
expected_conv_variables
,
actual_variable_set
)
else
:
self
.
assertSetEqual
(
expected_pool_variables
,
actual_variable_set
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
tf
.
test
.
main
()
research/object_detection/models/keras_applications/__init__.py
0 → 100644
View file @
27b4acd4
research/object_detection/models/keras_applications/mobilenet_v2.py
0 → 100644
View file @
27b4acd4
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A wrapper around the MobileNet v2 models for Keras, for object detection."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow
as
tf
from
object_detection.core
import
freezable_batch_norm
from
object_detection.utils
import
ops
# pylint: disable=invalid-name
# This method copied from the slim mobilenet base network code (same license)
def
_make_divisible
(
v
,
divisor
,
min_value
=
None
):
if
min_value
is
None
:
min_value
=
divisor
new_v
=
max
(
min_value
,
int
(
v
+
divisor
/
2
)
//
divisor
*
divisor
)
# Make sure that round down does not go down by more than 10%.
if
new_v
<
0.9
*
v
:
new_v
+=
divisor
return
new_v
class
_LayersOverride
(
object
):
"""Alternative Keras layers interface for the Keras MobileNetV2."""
def
__init__
(
self
,
batchnorm_training
,
default_batchnorm_momentum
=
0.999
,
conv_hyperparams
=
None
,
use_explicit_padding
=
False
,
alpha
=
1.0
,
min_depth
=
None
):
"""Alternative tf.keras.layers interface, for use by the Keras MobileNetV2.
It is used by the Keras applications kwargs injection API to
modify the Mobilenet v2 Keras application with changes required by
the Object Detection API.
These injected interfaces make the following changes to the network:
- Applies the Object Detection hyperparameter configuration
- Supports FreezableBatchNorms
- Adds support for a min number of filters for each layer
- Makes the `alpha` parameter affect the final convolution block even if it
is less than 1.0
- Adds support for explicit padding of convolutions
Args:
batchnorm_training: Bool. Assigned to Batch norm layer `training` param
when constructing `freezable_batch_norm.FreezableBatchNorm` layers.
default_batchnorm_momentum: Float. When 'conv_hyperparams' is None,
batch norm layers will be constructed using this value as the momentum.
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
containing hyperparameters for convolution ops. Optionally set to `None`
to use default mobilenet_v2 layer builders.
use_explicit_padding: If True, use 'valid' padding for convolutions,
but explicitly pre-pads inputs so that the output dimensions are the
same as if 'same' padding were used. Off by default.
alpha: The width multiplier referenced in the MobileNetV2 paper. It
modifies the number of filters in each convolutional layer.
min_depth: Minimum number of filters in the convolutional layers.
"""
self
.
_alpha
=
alpha
self
.
_batchnorm_training
=
batchnorm_training
self
.
_default_batchnorm_momentum
=
default_batchnorm_momentum
self
.
_conv_hyperparams
=
conv_hyperparams
self
.
_use_explicit_padding
=
use_explicit_padding
self
.
_min_depth
=
min_depth
def
_FixedPaddingLayer
(
self
,
kernel_size
):
return
tf
.
keras
.
layers
.
Lambda
(
lambda
x
:
ops
.
fixed_padding
(
x
,
kernel_size
))
def
Conv2D
(
self
,
filters
,
**
kwargs
):
"""Builds a Conv2D layer according to the current Object Detection config.
Overrides the Keras MobileNetV2 application's convolutions with ones that
follow the spec specified by the Object Detection hyperparameters.
Args:
filters: The number of filters to use for the convolution.
**kwargs: Keyword args specified by the Keras application for
constructing the convolution.
Returns:
A one-arg callable that will either directly apply a Keras Conv2D layer to
the input argument, or that will first pad the input then apply a Conv2D
layer.
"""
# Make sure 'alpha' is always applied to the last convolution block's size
# (This overrides the Keras application's functionality)
if
kwargs
.
get
(
'name'
)
==
'Conv_1'
and
self
.
_alpha
<
1.0
:
filters
=
_make_divisible
(
1280
*
self
.
_alpha
,
8
)
# Apply the minimum depth to the convolution layers
if
(
self
.
_min_depth
and
(
filters
<
self
.
_min_depth
)
and
not
kwargs
.
get
(
'name'
).
endswith
(
'expand'
)):
filters
=
self
.
_min_depth
if
self
.
_conv_hyperparams
:
kwargs
=
self
.
_conv_hyperparams
.
params
(
**
kwargs
)
kwargs
[
'padding'
]
=
'same'
kernel_size
=
kwargs
.
get
(
'kernel_size'
)
if
self
.
_use_explicit_padding
and
kernel_size
>
1
:
kwargs
[
'padding'
]
=
'valid'
def
padded_conv
(
features
):
padded_features
=
self
.
_FixedPaddingLayer
(
kernel_size
)(
features
)
return
tf
.
keras
.
layers
.
Conv2D
(
filters
,
**
kwargs
)(
padded_features
)
return
padded_conv
else
:
return
tf
.
keras
.
layers
.
Conv2D
(
filters
,
**
kwargs
)
def
DepthwiseConv2D
(
self
,
**
kwargs
):
"""Builds a DepthwiseConv2D according to the Object Detection config.
Overrides the Keras MobileNetV2 application's convolutions with ones that
follow the spec specified by the Object Detection hyperparameters.
Args:
**kwargs: Keyword args specified by the Keras application for
constructing the convolution.
Returns:
A one-arg callable that will either directly apply a Keras DepthwiseConv2D
layer to the input argument, or that will first pad the input then apply
the depthwise convolution.
"""
if
self
.
_conv_hyperparams
:
kwargs
=
self
.
_conv_hyperparams
.
params
(
**
kwargs
)
kwargs
[
'padding'
]
=
'same'
kernel_size
=
kwargs
.
get
(
'kernel_size'
)
if
self
.
_use_explicit_padding
and
kernel_size
>
1
:
kwargs
[
'padding'
]
=
'valid'
def
padded_depthwise_conv
(
features
):
padded_features
=
self
.
_FixedPaddingLayer
(
kernel_size
)(
features
)
return
tf
.
keras
.
layers
.
DepthwiseConv2D
(
**
kwargs
)(
padded_features
)
return
padded_depthwise_conv
else
:
return
tf
.
keras
.
layers
.
DepthwiseConv2D
(
**
kwargs
)
def
BatchNormalization
(
self
,
**
kwargs
):
"""Builds a normalization layer.
Overrides the Keras application batch norm with the norm specified by the
Object Detection configuration.
Args:
**kwargs: Only the name is used, all other params ignored.
Required for matching `layers.BatchNormalization` calls in the Keras
application.
Returns:
A normalization layer specified by the Object Detection hyperparameter
configurations.
"""
name
=
kwargs
.
get
(
'name'
)
if
self
.
_conv_hyperparams
:
return
self
.
_conv_hyperparams
.
build_batch_norm
(
training
=
self
.
_batchnorm_training
,
name
=
name
)
else
:
return
freezable_batch_norm
.
FreezableBatchNorm
(
training
=
self
.
_batchnorm_training
,
epsilon
=
1e-3
,
momentum
=
self
.
_default_batchnorm_momentum
,
name
=
name
)
def
Input
(
self
,
shape
):
"""Builds an Input layer.
Overrides the Keras application Input layer with one that uses a
tf.placeholder_with_default instead of a tf.placeholder. This is necessary
to ensure the application works when run on a TPU.
Args:
shape: The shape for the input layer to use. (Does not include a dimension
for the batch size).
Returns:
An input layer for the specified shape that internally uses a
placeholder_with_default.
"""
default_size
=
224
default_batch_size
=
1
shape
=
list
(
shape
)
default_shape
=
[
default_size
if
dim
is
None
else
dim
for
dim
in
shape
]
input_tensor
=
tf
.
constant
(
0.0
,
shape
=
[
default_batch_size
]
+
default_shape
)
placeholder_with_default
=
tf
.
placeholder_with_default
(
input
=
input_tensor
,
shape
=
[
None
]
+
shape
)
return
tf
.
keras
.
layers
.
Input
(
tensor
=
placeholder_with_default
)
# pylint: disable=unused-argument
def
ReLU
(
self
,
*
args
,
**
kwargs
):
"""Builds an activation layer.
Overrides the Keras application ReLU with the activation specified by the
Object Detection configuration.
Args:
*args: Ignored, required to match the `tf.keras.ReLU` interface
**kwargs: Only the name is used,
required to match `tf.keras.ReLU` interface
Returns:
An activation layer specified by the Object Detection hyperparameter
configurations.
"""
name
=
kwargs
.
get
(
'name'
)
if
self
.
_conv_hyperparams
:
return
self
.
_conv_hyperparams
.
build_activation_layer
(
name
=
name
)
else
:
return
tf
.
keras
.
layers
.
Lambda
(
tf
.
nn
.
relu6
,
name
=
name
)
# pylint: enable=unused-argument
# pylint: disable=unused-argument
def
ZeroPadding2D
(
self
,
**
kwargs
):
"""Replaces explicit padding in the Keras application with a no-op.
Args:
**kwargs: Ignored, required to match the Keras applications usage.
Returns:
A no-op identity lambda.
"""
return
lambda
x
:
x
# pylint: enable=unused-argument
# Forward all non-overridden methods to the keras layers
def
__getattr__
(
self
,
item
):
return
getattr
(
tf
.
keras
.
layers
,
item
)
def
mobilenet_v2
(
batchnorm_training
,
default_batchnorm_momentum
=
0.9997
,
conv_hyperparams
=
None
,
use_explicit_padding
=
False
,
alpha
=
1.0
,
min_depth
=
None
,
**
kwargs
):
"""Instantiates the MobileNetV2 architecture, modified for object detection.
This wraps the MobileNetV2 tensorflow Keras application, but uses the
Keras application's kwargs-based monkey-patching API to override the Keras
architecture with the following changes:
- Changes the default batchnorm momentum to 0.9997
- Applies the Object Detection hyperparameter configuration
- Supports FreezableBatchNorms
- Adds support for a min number of filters for each layer
- Makes the `alpha` parameter affect the final convolution block even if it
is less than 1.0
- Adds support for explicit padding of convolutions
- Makes the Input layer use a tf.placeholder_with_default instead of a
tf.placeholder, to work on TPUs.
Args:
batchnorm_training: Bool. Assigned to Batch norm layer `training` param
when constructing `freezable_batch_norm.FreezableBatchNorm` layers.
default_batchnorm_momentum: Float. When 'conv_hyperparams' is None,
batch norm layers will be constructed using this value as the momentum.
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
containing hyperparameters for convolution ops. Optionally set to `None`
to use default mobilenet_v2 layer builders.
use_explicit_padding: If True, use 'valid' padding for convolutions,
but explicitly pre-pads inputs so that the output dimensions are the
same as if 'same' padding were used. Off by default.
alpha: The width multiplier referenced in the MobileNetV2 paper. It
modifies the number of filters in each convolutional layer.
min_depth: Minimum number of filters in the convolutional layers.
**kwargs: Keyword arguments forwarded directly to the
`tf.keras.applications.MobilenetV2` method that constructs the Keras
model.
Returns:
A Keras model instance.
"""
layers_override
=
_LayersOverride
(
batchnorm_training
,
default_batchnorm_momentum
=
default_batchnorm_momentum
,
conv_hyperparams
=
conv_hyperparams
,
use_explicit_padding
=
use_explicit_padding
,
min_depth
=
min_depth
,
alpha
=
alpha
)
return
tf
.
keras
.
applications
.
MobileNetV2
(
alpha
=
alpha
,
layers
=
layers_override
,
**
kwargs
)
# pylint: enable=invalid-name
research/object_detection/models/keras_applications/mobilenet_v2_test.py
0 → 100644
View file @
27b4acd4
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for mobilenet_v2."""
import
itertools
import
numpy
as
np
import
tensorflow
as
tf
from
google.protobuf
import
text_format
from
object_detection.builders
import
hyperparams_builder
from
object_detection.models.keras_applications
import
mobilenet_v2
from
object_detection.protos
import
hyperparams_pb2
from
object_detection.utils
import
test_case
_layers_to_check
=
[
'Conv1_relu'
,
'block_1_expand_relu'
,
'block_1_depthwise_relu'
,
'block_1_project_BN'
,
'block_2_expand_relu'
,
'block_2_depthwise_relu'
,
'block_2_project_BN'
,
'block_3_expand_relu'
,
'block_3_depthwise_relu'
,
'block_3_project_BN'
,
'block_4_expand_relu'
,
'block_4_depthwise_relu'
,
'block_4_project_BN'
,
'block_5_expand_relu'
,
'block_5_depthwise_relu'
,
'block_5_project_BN'
,
'block_6_expand_relu'
,
'block_6_depthwise_relu'
,
'block_6_project_BN'
,
'block_7_expand_relu'
,
'block_7_depthwise_relu'
,
'block_7_project_BN'
,
'block_8_expand_relu'
,
'block_8_depthwise_relu'
,
'block_8_project_BN'
,
'block_9_expand_relu'
,
'block_9_depthwise_relu'
,
'block_9_project_BN'
,
'block_10_expand_relu'
,
'block_10_depthwise_relu'
,
'block_10_project_BN'
,
'block_11_expand_relu'
,
'block_11_depthwise_relu'
,
'block_11_project_BN'
,
'block_12_expand_relu'
,
'block_12_depthwise_relu'
,
'block_12_project_BN'
,
'block_13_expand_relu'
,
'block_13_depthwise_relu'
,
'block_13_project_BN'
,
'block_14_expand_relu'
,
'block_14_depthwise_relu'
,
'block_14_project_BN'
,
'block_15_expand_relu'
,
'block_15_depthwise_relu'
,
'block_15_project_BN'
,
'block_16_expand_relu'
,
'block_16_depthwise_relu'
,
'block_16_project_BN'
,
'out_relu'
]
class
MobilenetV2Test
(
test_case
.
TestCase
):
def
_build_conv_hyperparams
(
self
):
conv_hyperparams
=
hyperparams_pb2
.
Hyperparams
()
conv_hyperparams_text_proto
=
"""
activation: RELU_6
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
batch_norm {
train: true,
scale: false,
center: true,
decay: 0.2,
epsilon: 0.1,
}
"""
text_format
.
Merge
(
conv_hyperparams_text_proto
,
conv_hyperparams
)
return
hyperparams_builder
.
KerasLayerHyperparams
(
conv_hyperparams
)
def
_create_application_with_layer_outputs
(
self
,
layer_names
,
batchnorm_training
,
conv_hyperparams
=
None
,
use_explicit_padding
=
False
,
alpha
=
1.0
,
min_depth
=
None
):
"""Constructs Keras mobilenetv2 that extracts intermediate layer outputs."""
if
not
layer_names
:
layer_names
=
_layers_to_check
full_model
=
mobilenet_v2
.
mobilenet_v2
(
batchnorm_training
=
batchnorm_training
,
conv_hyperparams
=
conv_hyperparams
,
weights
=
None
,
use_explicit_padding
=
use_explicit_padding
,
alpha
=
alpha
,
min_depth
=
min_depth
,
include_top
=
False
)
layer_outputs
=
[
full_model
.
get_layer
(
name
=
layer
).
output
for
layer
in
layer_names
]
return
tf
.
keras
.
Model
(
inputs
=
full_model
.
inputs
,
outputs
=
layer_outputs
)
def
_check_returns_correct_shape
(
self
,
batch_size
,
image_height
,
image_width
,
depth_multiplier
,
expected_feature_map_shapes
,
use_explicit_padding
=
False
,
min_depth
=
None
,
layer_names
=
None
):
def
graph_fn
(
image_tensor
):
model
=
self
.
_create_application_with_layer_outputs
(
layer_names
=
layer_names
,
batchnorm_training
=
False
,
use_explicit_padding
=
use_explicit_padding
,
min_depth
=
min_depth
,
alpha
=
depth_multiplier
)
return
model
(
image_tensor
)
image_tensor
=
np
.
random
.
rand
(
batch_size
,
image_height
,
image_width
,
3
).
astype
(
np
.
float32
)
feature_maps
=
self
.
execute
(
graph_fn
,
[
image_tensor
])
for
feature_map
,
expected_shape
in
itertools
.
izip
(
feature_maps
,
expected_feature_map_shapes
):
self
.
assertAllEqual
(
feature_map
.
shape
,
expected_shape
)
def
_check_returns_correct_shapes_with_dynamic_inputs
(
self
,
batch_size
,
image_height
,
image_width
,
depth_multiplier
,
expected_feature_map_shapes
,
use_explicit_padding
=
False
,
layer_names
=
None
):
def
graph_fn
(
image_height
,
image_width
):
image_tensor
=
tf
.
random_uniform
([
batch_size
,
image_height
,
image_width
,
3
],
dtype
=
tf
.
float32
)
model
=
self
.
_create_application_with_layer_outputs
(
layer_names
=
layer_names
,
batchnorm_training
=
False
,
use_explicit_padding
=
use_explicit_padding
,
alpha
=
depth_multiplier
)
return
model
(
image_tensor
)
feature_maps
=
self
.
execute_cpu
(
graph_fn
,
[
np
.
array
(
image_height
,
dtype
=
np
.
int32
),
np
.
array
(
image_width
,
dtype
=
np
.
int32
)
])
for
feature_map
,
expected_shape
in
itertools
.
izip
(
feature_maps
,
expected_feature_map_shapes
):
self
.
assertAllEqual
(
feature_map
.
shape
,
expected_shape
)
def
_get_variables
(
self
,
depth_multiplier
,
layer_names
=
None
):
g
=
tf
.
Graph
()
with
g
.
as_default
():
preprocessed_inputs
=
tf
.
placeholder
(
tf
.
float32
,
(
4
,
None
,
None
,
3
))
model
=
self
.
_create_application_with_layer_outputs
(
layer_names
=
layer_names
,
batchnorm_training
=
False
,
use_explicit_padding
=
False
,
alpha
=
depth_multiplier
)
model
(
preprocessed_inputs
)
return
g
.
get_collection
(
tf
.
GraphKeys
.
GLOBAL_VARIABLES
)
def
test_returns_correct_shapes_128
(
self
):
image_height
=
128
image_width
=
128
depth_multiplier
=
1.0
expected_feature_map_shape
=
[(
2
,
64
,
64
,
32
),
(
2
,
64
,
64
,
96
),
(
2
,
32
,
32
,
96
),
(
2
,
32
,
32
,
24
),
(
2
,
32
,
32
,
144
),
(
2
,
32
,
32
,
144
),
(
2
,
32
,
32
,
24
),
(
2
,
32
,
32
,
144
),
(
2
,
16
,
16
,
144
),
(
2
,
16
,
16
,
32
),
(
2
,
16
,
16
,
192
),
(
2
,
16
,
16
,
192
),
(
2
,
16
,
16
,
32
),
(
2
,
16
,
16
,
192
),
(
2
,
16
,
16
,
192
),
(
2
,
16
,
16
,
32
),
(
2
,
16
,
16
,
192
),
(
2
,
8
,
8
,
192
),
(
2
,
8
,
8
,
64
),
(
2
,
8
,
8
,
384
),
(
2
,
8
,
8
,
384
),
(
2
,
8
,
8
,
64
),
(
2
,
8
,
8
,
384
),
(
2
,
8
,
8
,
384
),
(
2
,
8
,
8
,
64
),
(
2
,
8
,
8
,
384
),
(
2
,
8
,
8
,
384
),
(
2
,
8
,
8
,
64
),
(
2
,
8
,
8
,
384
),
(
2
,
8
,
8
,
384
),
(
2
,
8
,
8
,
96
),
(
2
,
8
,
8
,
576
),
(
2
,
8
,
8
,
576
),
(
2
,
8
,
8
,
96
),
(
2
,
8
,
8
,
576
),
(
2
,
8
,
8
,
576
),
(
2
,
8
,
8
,
96
),
(
2
,
8
,
8
,
576
),
(
2
,
4
,
4
,
576
),
(
2
,
4
,
4
,
160
),
(
2
,
4
,
4
,
960
),
(
2
,
4
,
4
,
960
),
(
2
,
4
,
4
,
160
),
(
2
,
4
,
4
,
960
),
(
2
,
4
,
4
,
960
),
(
2
,
4
,
4
,
160
),
(
2
,
4
,
4
,
960
),
(
2
,
4
,
4
,
960
),
(
2
,
4
,
4
,
320
),
(
2
,
4
,
4
,
1280
)]
self
.
_check_returns_correct_shape
(
2
,
image_height
,
image_width
,
depth_multiplier
,
expected_feature_map_shape
)
def
test_returns_correct_shapes_128_explicit_padding
(
self
):
image_height
=
128
image_width
=
128
depth_multiplier
=
1.0
expected_feature_map_shape
=
[(
2
,
64
,
64
,
32
),
(
2
,
64
,
64
,
96
),
(
2
,
32
,
32
,
96
),
(
2
,
32
,
32
,
24
),
(
2
,
32
,
32
,
144
),
(
2
,
32
,
32
,
144
),
(
2
,
32
,
32
,
24
),
(
2
,
32
,
32
,
144
),
(
2
,
16
,
16
,
144
),
(
2
,
16
,
16
,
32
),
(
2
,
16
,
16
,
192
),
(
2
,
16
,
16
,
192
),
(
2
,
16
,
16
,
32
),
(
2
,
16
,
16
,
192
),
(
2
,
16
,
16
,
192
),
(
2
,
16
,
16
,
32
),
(
2
,
16
,
16
,
192
),
(
2
,
8
,
8
,
192
),
(
2
,
8
,
8
,
64
),
(
2
,
8
,
8
,
384
),
(
2
,
8
,
8
,
384
),
(
2
,
8
,
8
,
64
),
(
2
,
8
,
8
,
384
),
(
2
,
8
,
8
,
384
),
(
2
,
8
,
8
,
64
),
(
2
,
8
,
8
,
384
),
(
2
,
8
,
8
,
384
),
(
2
,
8
,
8
,
64
),
(
2
,
8
,
8
,
384
),
(
2
,
8
,
8
,
384
),
(
2
,
8
,
8
,
96
),
(
2
,
8
,
8
,
576
),
(
2
,
8
,
8
,
576
),
(
2
,
8
,
8
,
96
),
(
2
,
8
,
8
,
576
),
(
2
,
8
,
8
,
576
),
(
2
,
8
,
8
,
96
),
(
2
,
8
,
8
,
576
),
(
2
,
4
,
4
,
576
),
(
2
,
4
,
4
,
160
),
(
2
,
4
,
4
,
960
),
(
2
,
4
,
4
,
960
),
(
2
,
4
,
4
,
160
),
(
2
,
4
,
4
,
960
),
(
2
,
4
,
4
,
960
),
(
2
,
4
,
4
,
160
),
(
2
,
4
,
4
,
960
),
(
2
,
4
,
4
,
960
),
(
2
,
4
,
4
,
320
),
(
2
,
4
,
4
,
1280
)]
self
.
_check_returns_correct_shape
(
2
,
image_height
,
image_width
,
depth_multiplier
,
expected_feature_map_shape
,
use_explicit_padding
=
True
)
def
test_returns_correct_shapes_with_dynamic_inputs
(
self
):
image_height
=
128
image_width
=
128
depth_multiplier
=
1.0
expected_feature_map_shape
=
[(
2
,
64
,
64
,
32
),
(
2
,
64
,
64
,
96
),
(
2
,
32
,
32
,
96
),
(
2
,
32
,
32
,
24
),
(
2
,
32
,
32
,
144
),
(
2
,
32
,
32
,
144
),
(
2
,
32
,
32
,
24
),
(
2
,
32
,
32
,
144
),
(
2
,
16
,
16
,
144
),
(
2
,
16
,
16
,
32
),
(
2
,
16
,
16
,
192
),
(
2
,
16
,
16
,
192
),
(
2
,
16
,
16
,
32
),
(
2
,
16
,
16
,
192
),
(
2
,
16
,
16
,
192
),
(
2
,
16
,
16
,
32
),
(
2
,
16
,
16
,
192
),
(
2
,
8
,
8
,
192
),
(
2
,
8
,
8
,
64
),
(
2
,
8
,
8
,
384
),
(
2
,
8
,
8
,
384
),
(
2
,
8
,
8
,
64
),
(
2
,
8
,
8
,
384
),
(
2
,
8
,
8
,
384
),
(
2
,
8
,
8
,
64
),
(
2
,
8
,
8
,
384
),
(
2
,
8
,
8
,
384
),
(
2
,
8
,
8
,
64
),
(
2
,
8
,
8
,
384
),
(
2
,
8
,
8
,
384
),
(
2
,
8
,
8
,
96
),
(
2
,
8
,
8
,
576
),
(
2
,
8
,
8
,
576
),
(
2
,
8
,
8
,
96
),
(
2
,
8
,
8
,
576
),
(
2
,
8
,
8
,
576
),
(
2
,
8
,
8
,
96
),
(
2
,
8
,
8
,
576
),
(
2
,
4
,
4
,
576
),
(
2
,
4
,
4
,
160
),
(
2
,
4
,
4
,
960
),
(
2
,
4
,
4
,
960
),
(
2
,
4
,
4
,
160
),
(
2
,
4
,
4
,
960
),
(
2
,
4
,
4
,
960
),
(
2
,
4
,
4
,
160
),
(
2
,
4
,
4
,
960
),
(
2
,
4
,
4
,
960
),
(
2
,
4
,
4
,
320
),
(
2
,
4
,
4
,
1280
)]
self
.
_check_returns_correct_shapes_with_dynamic_inputs
(
2
,
image_height
,
image_width
,
depth_multiplier
,
expected_feature_map_shape
)
def
test_returns_correct_shapes_299
(
self
):
image_height
=
299
image_width
=
299
depth_multiplier
=
1.0
expected_feature_map_shape
=
[(
2
,
150
,
150
,
32
),
(
2
,
150
,
150
,
96
),
(
2
,
75
,
75
,
96
),
(
2
,
75
,
75
,
24
),
(
2
,
75
,
75
,
144
),
(
2
,
75
,
75
,
144
),
(
2
,
75
,
75
,
24
),
(
2
,
75
,
75
,
144
),
(
2
,
38
,
38
,
144
),
(
2
,
38
,
38
,
32
),
(
2
,
38
,
38
,
192
),
(
2
,
38
,
38
,
192
),
(
2
,
38
,
38
,
32
),
(
2
,
38
,
38
,
192
),
(
2
,
38
,
38
,
192
),
(
2
,
38
,
38
,
32
),
(
2
,
38
,
38
,
192
),
(
2
,
19
,
19
,
192
),
(
2
,
19
,
19
,
64
),
(
2
,
19
,
19
,
384
),
(
2
,
19
,
19
,
384
),
(
2
,
19
,
19
,
64
),
(
2
,
19
,
19
,
384
),
(
2
,
19
,
19
,
384
),
(
2
,
19
,
19
,
64
),
(
2
,
19
,
19
,
384
),
(
2
,
19
,
19
,
384
),
(
2
,
19
,
19
,
64
),
(
2
,
19
,
19
,
384
),
(
2
,
19
,
19
,
384
),
(
2
,
19
,
19
,
96
),
(
2
,
19
,
19
,
576
),
(
2
,
19
,
19
,
576
),
(
2
,
19
,
19
,
96
),
(
2
,
19
,
19
,
576
),
(
2
,
19
,
19
,
576
),
(
2
,
19
,
19
,
96
),
(
2
,
19
,
19
,
576
),
(
2
,
10
,
10
,
576
),
(
2
,
10
,
10
,
160
),
(
2
,
10
,
10
,
960
),
(
2
,
10
,
10
,
960
),
(
2
,
10
,
10
,
160
),
(
2
,
10
,
10
,
960
),
(
2
,
10
,
10
,
960
),
(
2
,
10
,
10
,
160
),
(
2
,
10
,
10
,
960
),
(
2
,
10
,
10
,
960
),
(
2
,
10
,
10
,
320
),
(
2
,
10
,
10
,
1280
)]
self
.
_check_returns_correct_shape
(
2
,
image_height
,
image_width
,
depth_multiplier
,
expected_feature_map_shape
)
def
test_returns_correct_shapes_enforcing_min_depth
(
self
):
image_height
=
299
image_width
=
299
depth_multiplier
=
0.5
**
12
expected_feature_map_shape
=
[(
2
,
150
,
150
,
32
),
(
2
,
150
,
150
,
192
),
(
2
,
75
,
75
,
192
),
(
2
,
75
,
75
,
32
),
(
2
,
75
,
75
,
192
),
(
2
,
75
,
75
,
192
),
(
2
,
75
,
75
,
32
),
(
2
,
75
,
75
,
192
),
(
2
,
38
,
38
,
192
),
(
2
,
38
,
38
,
32
),
(
2
,
38
,
38
,
192
),
(
2
,
38
,
38
,
192
),
(
2
,
38
,
38
,
32
),
(
2
,
38
,
38
,
192
),
(
2
,
38
,
38
,
192
),
(
2
,
38
,
38
,
32
),
(
2
,
38
,
38
,
192
),
(
2
,
19
,
19
,
192
),
(
2
,
19
,
19
,
32
),
(
2
,
19
,
19
,
192
),
(
2
,
19
,
19
,
192
),
(
2
,
19
,
19
,
32
),
(
2
,
19
,
19
,
192
),
(
2
,
19
,
19
,
192
),
(
2
,
19
,
19
,
32
),
(
2
,
19
,
19
,
192
),
(
2
,
19
,
19
,
192
),
(
2
,
19
,
19
,
32
),
(
2
,
19
,
19
,
192
),
(
2
,
19
,
19
,
192
),
(
2
,
19
,
19
,
32
),
(
2
,
19
,
19
,
192
),
(
2
,
19
,
19
,
192
),
(
2
,
19
,
19
,
32
),
(
2
,
19
,
19
,
192
),
(
2
,
19
,
19
,
192
),
(
2
,
19
,
19
,
32
),
(
2
,
19
,
19
,
192
),
(
2
,
10
,
10
,
192
),
(
2
,
10
,
10
,
32
),
(
2
,
10
,
10
,
192
),
(
2
,
10
,
10
,
192
),
(
2
,
10
,
10
,
32
),
(
2
,
10
,
10
,
192
),
(
2
,
10
,
10
,
192
),
(
2
,
10
,
10
,
32
),
(
2
,
10
,
10
,
192
),
(
2
,
10
,
10
,
192
),
(
2
,
10
,
10
,
32
),
(
2
,
10
,
10
,
32
)]
self
.
_check_returns_correct_shape
(
2
,
image_height
,
image_width
,
depth_multiplier
,
expected_feature_map_shape
,
min_depth
=
32
)
def
test_hyperparam_override
(
self
):
hyperparams
=
self
.
_build_conv_hyperparams
()
model
=
mobilenet_v2
.
mobilenet_v2
(
batchnorm_training
=
True
,
conv_hyperparams
=
hyperparams
,
weights
=
None
,
use_explicit_padding
=
False
,
alpha
=
1.0
,
min_depth
=
32
,
include_top
=
False
)
hyperparams
.
params
()
bn_layer
=
model
.
get_layer
(
name
=
'block_5_project_BN'
)
self
.
assertAllClose
(
bn_layer
.
momentum
,
0.2
)
self
.
assertAllClose
(
bn_layer
.
epsilon
,
0.1
)
def
test_variable_count
(
self
):
depth_multiplier
=
1
variables
=
self
.
_get_variables
(
depth_multiplier
)
self
.
assertEqual
(
len
(
variables
),
260
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/models/ssd_feature_extractor_test.py
View file @
27b4acd4
...
@@ -21,18 +21,40 @@ import itertools
...
@@ -21,18 +21,40 @@ import itertools
import
numpy
as
np
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow
as
tf
from
google.protobuf
import
text_format
from
object_detection.builders
import
hyperparams_builder
from
object_detection.protos
import
hyperparams_pb2
from
object_detection.utils
import
test_case
from
object_detection.utils
import
test_case
class
SsdFeatureExtractorTestBase
(
test_case
.
TestCase
):
class
SsdFeatureExtractorTestBase
(
test_case
.
TestCase
):
def
_build_conv_hyperparams
(
self
):
conv_hyperparams
=
hyperparams_pb2
.
Hyperparams
()
conv_hyperparams_text_proto
=
"""
activation: RELU_6
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
batch_norm {
scale: false
}
"""
text_format
.
Merge
(
conv_hyperparams_text_proto
,
conv_hyperparams
)
return
hyperparams_builder
.
KerasLayerHyperparams
(
conv_hyperparams
)
def
conv_hyperparams_fn
(
self
):
def
conv_hyperparams_fn
(
self
):
with
tf
.
contrib
.
slim
.
arg_scope
([])
as
sc
:
with
tf
.
contrib
.
slim
.
arg_scope
([])
as
sc
:
return
sc
return
sc
@
abstractmethod
@
abstractmethod
def
_create_feature_extractor
(
self
,
depth_multiplier
,
pad_to_multiple
,
def
_create_feature_extractor
(
self
,
depth_multiplier
,
pad_to_multiple
,
use_explicit_padding
=
False
):
use_explicit_padding
=
False
,
use_keras
=
False
):
"""Constructs a new feature extractor.
"""Constructs a new feature extractor.
Args:
Args:
...
@@ -42,20 +64,42 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
...
@@ -42,20 +64,42 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
use_explicit_padding: use 'VALID' padding for convolutions, but prepad
use_explicit_padding: use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
were used.
use_keras: if True builds a keras-based feature extractor, if False builds
a slim-based one.
Returns:
Returns:
an ssd_meta_arch.SSDFeatureExtractor object.
an ssd_meta_arch.SSDFeatureExtractor or an
ssd_meta_arch.SSDKerasFeatureExtractor object.
"""
"""
pass
pass
def
check_extract_features_returns_correct_shape
(
def
_extract_features
(
self
,
image_tensor
,
depth_multiplier
,
pad_to_multiple
,
self
,
batch_size
,
image_height
,
image_width
,
depth_multiplier
,
use_explicit_padding
=
False
,
use_keras
=
False
):
pad_to_multiple
,
expected_feature_map_shapes
,
use_explicit_padding
=
False
):
try
:
def
graph_fn
(
image_tensor
):
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
pad_to_multiple
,
use_explicit_padding
,
use_keras
=
use_keras
)
# If the unit test does not support a use_keras arg, it raises an error:
except
TypeError
:
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
pad_to_multiple
,
pad_to_multiple
,
use_explicit_padding
)
use_explicit_padding
)
if
use_keras
:
feature_maps
=
feature_extractor
(
image_tensor
)
else
:
feature_maps
=
feature_extractor
.
extract_features
(
image_tensor
)
feature_maps
=
feature_extractor
.
extract_features
(
image_tensor
)
return
feature_maps
return
feature_maps
def
check_extract_features_returns_correct_shape
(
self
,
batch_size
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shapes
,
use_explicit_padding
=
False
,
use_keras
=
False
):
def
graph_fn
(
image_tensor
):
return
self
.
_extract_features
(
image_tensor
,
depth_multiplier
,
pad_to_multiple
,
use_explicit_padding
,
use_keras
=
use_keras
)
image_tensor
=
np
.
random
.
rand
(
batch_size
,
image_height
,
image_width
,
image_tensor
=
np
.
random
.
rand
(
batch_size
,
image_height
,
image_width
,
3
).
astype
(
np
.
float32
)
3
).
astype
(
np
.
float32
)
...
@@ -66,15 +110,16 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
...
@@ -66,15 +110,16 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
def
check_extract_features_returns_correct_shapes_with_dynamic_inputs
(
def
check_extract_features_returns_correct_shapes_with_dynamic_inputs
(
self
,
batch_size
,
image_height
,
image_width
,
depth_multiplier
,
self
,
batch_size
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shapes
,
use_explicit_padding
=
False
):
pad_to_multiple
,
expected_feature_map_shapes
,
use_explicit_padding
=
False
,
use_keras
=
False
):
def
graph_fn
(
image_height
,
image_width
):
def
graph_fn
(
image_height
,
image_width
):
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
pad_to_multiple
,
use_explicit_padding
)
image_tensor
=
tf
.
random_uniform
([
batch_size
,
image_height
,
image_width
,
image_tensor
=
tf
.
random_uniform
([
batch_size
,
image_height
,
image_width
,
3
],
dtype
=
tf
.
float32
)
3
],
dtype
=
tf
.
float32
)
feature_maps
=
feature_extractor
.
extract_features
(
image_tensor
)
return
self
.
_extract_features
(
image_tensor
,
return
feature_maps
depth_multiplier
,
pad_to_multiple
,
use_explicit_padding
,
use_keras
=
use_keras
)
feature_maps
=
self
.
execute_cpu
(
graph_fn
,
[
feature_maps
=
self
.
execute_cpu
(
graph_fn
,
[
np
.
array
(
image_height
,
dtype
=
np
.
int32
),
np
.
array
(
image_height
,
dtype
=
np
.
int32
),
...
@@ -85,11 +130,13 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
...
@@ -85,11 +130,13 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
self
.
assertAllEqual
(
feature_map
.
shape
,
expected_shape
)
self
.
assertAllEqual
(
feature_map
.
shape
,
expected_shape
)
def
check_extract_features_raises_error_with_invalid_image_size
(
def
check_extract_features_raises_error_with_invalid_image_size
(
self
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
):
self
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
use_keras
=
False
):
pad_to_multiple
)
preprocessed_inputs
=
tf
.
placeholder
(
tf
.
float32
,
(
4
,
None
,
None
,
3
))
preprocessed_inputs
=
tf
.
placeholder
(
tf
.
float32
,
(
4
,
None
,
None
,
3
))
feature_maps
=
feature_extractor
.
extract_features
(
preprocessed_inputs
)
feature_maps
=
self
.
_extract_features
(
preprocessed_inputs
,
depth_multiplier
,
pad_to_multiple
,
use_keras
=
use_keras
)
test_preprocessed_image
=
np
.
random
.
rand
(
4
,
image_height
,
image_width
,
3
)
test_preprocessed_image
=
np
.
random
.
rand
(
4
,
image_height
,
image_width
,
3
)
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
sess
.
run
(
tf
.
global_variables_initializer
())
sess
.
run
(
tf
.
global_variables_initializer
())
...
@@ -98,13 +145,19 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
...
@@ -98,13 +145,19 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
feed_dict
=
{
preprocessed_inputs
:
test_preprocessed_image
})
feed_dict
=
{
preprocessed_inputs
:
test_preprocessed_image
})
def
check_feature_extractor_variables_under_scope
(
def
check_feature_extractor_variables_under_scope
(
self
,
depth_multiplier
,
pad_to_multiple
,
scope_name
):
self
,
depth_multiplier
,
pad_to_multiple
,
scope_name
,
use_keras
=
False
):
variables
=
self
.
get_feature_extractor_variables
(
depth_multiplier
,
pad_to_multiple
,
use_keras
)
for
variable
in
variables
:
self
.
assertTrue
(
variable
.
name
.
startswith
(
scope_name
))
def
get_feature_extractor_variables
(
self
,
depth_multiplier
,
pad_to_multiple
,
use_keras
=
False
):
g
=
tf
.
Graph
()
g
=
tf
.
Graph
()
with
g
.
as_default
():
with
g
.
as_default
():
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
pad_to_multiple
)
preprocessed_inputs
=
tf
.
placeholder
(
tf
.
float32
,
(
4
,
None
,
None
,
3
))
preprocessed_inputs
=
tf
.
placeholder
(
tf
.
float32
,
(
4
,
None
,
None
,
3
))
feature_extractor
.
extract_features
(
preprocessed_inputs
)
self
.
_extract_features
(
preprocessed_inputs
,
variables
=
g
.
get_collection
(
tf
.
GraphKeys
.
GLOBAL_VARIABLES
)
depth_multiplier
,
for
variable
in
variables
:
pad_to_multiple
,
self
.
assertTrue
(
variable
.
name
.
startswith
(
scope_name
))
use_keras
=
use_keras
)
return
g
.
get_collection
(
tf
.
GraphKeys
.
GLOBAL_VARIABLES
)
research/object_detection/models/ssd_mobilenet_v1_fpn_feature_extractor.py
View file @
27b4acd4
...
@@ -15,6 +15,8 @@
...
@@ -15,6 +15,8 @@
"""SSD MobilenetV1 FPN Feature Extractor."""
"""SSD MobilenetV1 FPN Feature Extractor."""
import
copy
import
functools
import
tensorflow
as
tf
import
tensorflow
as
tf
from
object_detection.meta_architectures
import
ssd_meta_arch
from
object_detection.meta_architectures
import
ssd_meta_arch
...
@@ -27,6 +29,15 @@ from nets import mobilenet_v1
...
@@ -27,6 +29,15 @@ from nets import mobilenet_v1
slim
=
tf
.
contrib
.
slim
slim
=
tf
.
contrib
.
slim
# A modified config of mobilenet v1 that makes it more detection friendly,
def
_create_modified_mobilenet_config
():
conv_defs
=
copy
.
copy
(
mobilenet_v1
.
MOBILENETV1_CONV_DEFS
)
conv_defs
[
-
2
]
=
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
2
,
depth
=
512
)
conv_defs
[
-
1
]
=
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
1
,
depth
=
256
)
return
conv_defs
_CONV_DEFS
=
_create_modified_mobilenet_config
()
class
SSDMobileNetV1FpnFeatureExtractor
(
ssd_meta_arch
.
SSDFeatureExtractor
):
class
SSDMobileNetV1FpnFeatureExtractor
(
ssd_meta_arch
.
SSDFeatureExtractor
):
"""SSD Feature Extractor using MobilenetV1 FPN features."""
"""SSD Feature Extractor using MobilenetV1 FPN features."""
...
@@ -38,6 +49,7 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -38,6 +49,7 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
conv_hyperparams_fn
,
conv_hyperparams_fn
,
fpn_min_level
=
3
,
fpn_min_level
=
3
,
fpn_max_level
=
7
,
fpn_max_level
=
7
,
additional_layer_depth
=
256
,
reuse_weights
=
None
,
reuse_weights
=
None
,
use_explicit_padding
=
False
,
use_explicit_padding
=
False
,
use_depthwise
=
False
,
use_depthwise
=
False
,
...
@@ -63,6 +75,7 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -63,6 +75,7 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
maps in the backbone network, additional feature maps are created by
maps in the backbone network, additional feature maps are created by
applying stride 2 convolutions until we get the desired number of fpn
applying stride 2 convolutions until we get the desired number of fpn
levels.
levels.
additional_layer_depth: additional feature map layer channel depth.
reuse_weights: whether to reuse variables. Default is None.
reuse_weights: whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
features. Default is False.
...
@@ -84,6 +97,7 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -84,6 +97,7 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
override_base_feature_extractor_hyperparams
)
override_base_feature_extractor_hyperparams
)
self
.
_fpn_min_level
=
fpn_min_level
self
.
_fpn_min_level
=
fpn_min_level
self
.
_fpn_max_level
=
fpn_max_level
self
.
_fpn_max_level
=
fpn_max_level
self
.
_additional_layer_depth
=
additional_layer_depth
def
preprocess
(
self
,
resized_inputs
):
def
preprocess
(
self
,
resized_inputs
):
"""SSD preprocessing.
"""SSD preprocessing.
...
@@ -127,6 +141,7 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -127,6 +141,7 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
final_endpoint
=
'Conv2d_13_pointwise'
,
final_endpoint
=
'Conv2d_13_pointwise'
,
min_depth
=
self
.
_min_depth
,
min_depth
=
self
.
_min_depth
,
depth_multiplier
=
self
.
_depth_multiplier
,
depth_multiplier
=
self
.
_depth_multiplier
,
conv_defs
=
_CONV_DEFS
if
self
.
_use_depthwise
else
None
,
use_explicit_padding
=
self
.
_use_explicit_padding
,
use_explicit_padding
=
self
.
_use_explicit_padding
,
scope
=
scope
)
scope
=
scope
)
...
@@ -143,7 +158,8 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -143,7 +158,8 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
feature_block_list
.
append
(
feature_blocks
[
level
-
2
])
feature_block_list
.
append
(
feature_blocks
[
level
-
2
])
fpn_features
=
feature_map_generators
.
fpn_top_down_feature_maps
(
fpn_features
=
feature_map_generators
.
fpn_top_down_feature_maps
(
[(
key
,
image_features
[
key
])
for
key
in
feature_block_list
],
[(
key
,
image_features
[
key
])
for
key
in
feature_block_list
],
depth
=
depth_fn
(
256
))
depth
=
depth_fn
(
self
.
_additional_layer_depth
),
use_depthwise
=
self
.
_use_depthwise
)
feature_maps
=
[]
feature_maps
=
[]
for
level
in
range
(
self
.
_fpn_min_level
,
base_fpn_max_level
+
1
):
for
level
in
range
(
self
.
_fpn_min_level
,
base_fpn_max_level
+
1
):
feature_maps
.
append
(
fpn_features
[
'top_down_{}'
.
format
(
feature_maps
.
append
(
fpn_features
[
'top_down_{}'
.
format
(
...
@@ -152,9 +168,14 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -152,9 +168,14 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
feature_blocks
[
base_fpn_max_level
-
2
])]
feature_blocks
[
base_fpn_max_level
-
2
])]
# Construct coarse features
# Construct coarse features
for
i
in
range
(
base_fpn_max_level
+
1
,
self
.
_fpn_max_level
+
1
):
for
i
in
range
(
base_fpn_max_level
+
1
,
self
.
_fpn_max_level
+
1
):
last_feature_map
=
slim
.
conv2d
(
if
self
.
_use_depthwise
:
conv_op
=
functools
.
partial
(
slim
.
separable_conv2d
,
depth_multiplier
=
1
)
else
:
conv_op
=
slim
.
conv2d
last_feature_map
=
conv_op
(
last_feature_map
,
last_feature_map
,
num_outputs
=
depth_fn
(
256
),
num_outputs
=
depth_fn
(
self
.
_additional_layer_depth
),
kernel_size
=
[
3
,
3
],
kernel_size
=
[
3
,
3
],
stride
=
2
,
stride
=
2
,
padding
=
'SAME'
,
padding
=
'SAME'
,
...
...
research/object_detection/models/ssd_mobilenet_v2_feature_extractor_test.py
View file @
27b4acd4
...
@@ -14,20 +14,27 @@
...
@@ -14,20 +14,27 @@
# ==============================================================================
# ==============================================================================
"""Tests for ssd_mobilenet_v2_feature_extractor."""
"""Tests for ssd_mobilenet_v2_feature_extractor."""
from
absl.testing
import
parameterized
import
numpy
as
np
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow
as
tf
from
object_detection.models
import
ssd_feature_extractor_test
from
object_detection.models
import
ssd_feature_extractor_test
from
object_detection.models
import
ssd_mobilenet_v2_feature_extractor
from
object_detection.models
import
ssd_mobilenet_v2_feature_extractor
from
object_detection.models
import
ssd_mobilenet_v2_keras_feature_extractor
slim
=
tf
.
contrib
.
slim
slim
=
tf
.
contrib
.
slim
@
parameterized
.
parameters
(
{
'use_keras'
:
False
},
{
'use_keras'
:
True
},
)
class
SsdMobilenetV2FeatureExtractorTest
(
class
SsdMobilenetV2FeatureExtractorTest
(
ssd_feature_extractor_test
.
SsdFeatureExtractorTestBase
):
ssd_feature_extractor_test
.
SsdFeatureExtractorTestBase
):
def
_create_feature_extractor
(
self
,
depth_multiplier
,
pad_to_multiple
,
def
_create_feature_extractor
(
self
,
depth_multiplier
,
pad_to_multiple
,
use_explicit_padding
=
False
):
use_explicit_padding
=
False
,
use_keras
=
False
):
"""Constructs a new feature extractor.
"""Constructs a new feature extractor.
Args:
Args:
...
@@ -37,19 +44,47 @@ class SsdMobilenetV2FeatureExtractorTest(
...
@@ -37,19 +44,47 @@ class SsdMobilenetV2FeatureExtractorTest(
use_explicit_padding: use 'VALID' padding for convolutions, but prepad
use_explicit_padding: use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
were used.
use_keras: if True builds a keras-based feature extractor, if False builds
a slim-based one.
Returns:
Returns:
an ssd_meta_arch.SSDFeatureExtractor object.
an ssd_meta_arch.SSDFeatureExtractor object.
"""
"""
min_depth
=
32
min_depth
=
32
return
ssd_mobilenet_v2_feature_extractor
.
SSDMobileNetV2FeatureExtractor
(
if
use_keras
:
False
,
return
(
ssd_mobilenet_v2_keras_feature_extractor
.
depth_multiplier
,
SSDMobileNetV2KerasFeatureExtractor
(
min_depth
,
is_training
=
False
,
pad_to_multiple
,
depth_multiplier
=
depth_multiplier
,
self
.
conv_hyperparams_fn
,
min_depth
=
min_depth
,
use_explicit_padding
=
use_explicit_padding
)
pad_to_multiple
=
pad_to_multiple
,
conv_hyperparams
=
self
.
_build_conv_hyperparams
(),
def
test_extract_features_returns_correct_shapes_128
(
self
):
freeze_batchnorm
=
False
,
inplace_batchnorm_update
=
False
,
use_explicit_padding
=
use_explicit_padding
,
name
=
'MobilenetV2'
))
else
:
return
ssd_mobilenet_v2_feature_extractor
.
SSDMobileNetV2FeatureExtractor
(
False
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
self
.
conv_hyperparams_fn
,
use_explicit_padding
=
use_explicit_padding
)
def
test_extract_features_returns_correct_shapes_128
(
self
,
use_keras
):
image_height
=
128
image_width
=
128
depth_multiplier
=
1.0
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
2
,
8
,
8
,
576
),
(
2
,
4
,
4
,
1280
),
(
2
,
2
,
2
,
512
),
(
2
,
1
,
1
,
256
),
(
2
,
1
,
1
,
256
),
(
2
,
1
,
1
,
128
)]
self
.
check_extract_features_returns_correct_shape
(
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
,
use_keras
=
use_keras
)
def
test_extract_features_returns_correct_shapes_128_explicit_padding
(
self
,
use_keras
):
image_height
=
128
image_height
=
128
image_width
=
128
image_width
=
128
depth_multiplier
=
1.0
depth_multiplier
=
1.0
...
@@ -59,9 +94,11 @@ class SsdMobilenetV2FeatureExtractorTest(
...
@@ -59,9 +94,11 @@ class SsdMobilenetV2FeatureExtractorTest(
(
2
,
1
,
1
,
256
),
(
2
,
1
,
1
,
128
)]
(
2
,
1
,
1
,
256
),
(
2
,
1
,
1
,
128
)]
self
.
check_extract_features_returns_correct_shape
(
self
.
check_extract_features_returns_correct_shape
(
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
expected_feature_map_shape
,
use_explicit_padding
=
True
,
use_keras
=
use_keras
)
def
test_extract_features_returns_correct_shapes_with_dynamic_inputs
(
self
):
def
test_extract_features_returns_correct_shapes_with_dynamic_inputs
(
self
,
use_keras
):
image_height
=
128
image_height
=
128
image_width
=
128
image_width
=
128
depth_multiplier
=
1.0
depth_multiplier
=
1.0
...
@@ -71,9 +108,9 @@ class SsdMobilenetV2FeatureExtractorTest(
...
@@ -71,9 +108,9 @@ class SsdMobilenetV2FeatureExtractorTest(
(
2
,
1
,
1
,
256
),
(
2
,
1
,
1
,
128
)]
(
2
,
1
,
1
,
256
),
(
2
,
1
,
1
,
128
)]
self
.
check_extract_features_returns_correct_shapes_with_dynamic_inputs
(
self
.
check_extract_features_returns_correct_shapes_with_dynamic_inputs
(
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
expected_feature_map_shape
,
use_keras
=
use_keras
)
def
test_extract_features_returns_correct_shapes_299
(
self
):
def
test_extract_features_returns_correct_shapes_299
(
self
,
use_keras
):
image_height
=
299
image_height
=
299
image_width
=
299
image_width
=
299
depth_multiplier
=
1.0
depth_multiplier
=
1.0
...
@@ -83,9 +120,10 @@ class SsdMobilenetV2FeatureExtractorTest(
...
@@ -83,9 +120,10 @@ class SsdMobilenetV2FeatureExtractorTest(
(
2
,
2
,
2
,
256
),
(
2
,
1
,
1
,
128
)]
(
2
,
2
,
2
,
256
),
(
2
,
1
,
1
,
128
)]
self
.
check_extract_features_returns_correct_shape
(
self
.
check_extract_features_returns_correct_shape
(
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
expected_feature_map_shape
,
use_keras
=
use_keras
)
def
test_extract_features_returns_correct_shapes_enforcing_min_depth
(
self
):
def
test_extract_features_returns_correct_shapes_enforcing_min_depth
(
self
,
use_keras
):
image_height
=
299
image_height
=
299
image_width
=
299
image_width
=
299
depth_multiplier
=
0.5
**
12
depth_multiplier
=
0.5
**
12
...
@@ -95,9 +133,10 @@ class SsdMobilenetV2FeatureExtractorTest(
...
@@ -95,9 +133,10 @@ class SsdMobilenetV2FeatureExtractorTest(
(
2
,
2
,
2
,
32
),
(
2
,
1
,
1
,
32
)]
(
2
,
2
,
2
,
32
),
(
2
,
1
,
1
,
32
)]
self
.
check_extract_features_returns_correct_shape
(
self
.
check_extract_features_returns_correct_shape
(
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
expected_feature_map_shape
,
use_keras
=
use_keras
)
def
test_extract_features_returns_correct_shapes_with_pad_to_multiple
(
self
):
def
test_extract_features_returns_correct_shapes_with_pad_to_multiple
(
self
,
use_keras
):
image_height
=
299
image_height
=
299
image_width
=
299
image_width
=
299
depth_multiplier
=
1.0
depth_multiplier
=
1.0
...
@@ -107,35 +146,45 @@ class SsdMobilenetV2FeatureExtractorTest(
...
@@ -107,35 +146,45 @@ class SsdMobilenetV2FeatureExtractorTest(
(
2
,
2
,
2
,
256
),
(
2
,
1
,
1
,
128
)]
(
2
,
2
,
2
,
256
),
(
2
,
1
,
1
,
128
)]
self
.
check_extract_features_returns_correct_shape
(
self
.
check_extract_features_returns_correct_shape
(
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
expected_feature_map_shape
,
use_keras
=
use_keras
)
def
test_extract_features_raises_error_with_invalid_image_size
(
self
):
def
test_extract_features_raises_error_with_invalid_image_size
(
self
,
use_keras
):
image_height
=
32
image_height
=
32
image_width
=
32
image_width
=
32
depth_multiplier
=
1.0
depth_multiplier
=
1.0
pad_to_multiple
=
1
pad_to_multiple
=
1
self
.
check_extract_features_raises_error_with_invalid_image_size
(
self
.
check_extract_features_raises_error_with_invalid_image_size
(
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
)
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
use_keras
=
use_keras
)
def
test_preprocess_returns_correct_value_range
(
self
):
def
test_preprocess_returns_correct_value_range
(
self
,
use_keras
):
image_height
=
128
image_height
=
128
image_width
=
128
image_width
=
128
depth_multiplier
=
1
depth_multiplier
=
1
pad_to_multiple
=
1
pad_to_multiple
=
1
test_image
=
np
.
random
.
rand
(
4
,
image_height
,
image_width
,
3
)
test_image
=
np
.
random
.
rand
(
4
,
image_height
,
image_width
,
3
)
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
pad_to_multiple
)
pad_to_multiple
,
use_keras
=
use_keras
)
preprocessed_image
=
feature_extractor
.
preprocess
(
test_image
)
preprocessed_image
=
feature_extractor
.
preprocess
(
test_image
)
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
np
.
abs
(
preprocessed_image
),
1.0
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
np
.
abs
(
preprocessed_image
),
1.0
)))
def
test_variables_only_created_in_scope
(
self
):
def
test_variables_only_created_in_scope
(
self
,
use_keras
):
depth_multiplier
=
1
depth_multiplier
=
1
pad_to_multiple
=
1
pad_to_multiple
=
1
scope_name
=
'MobilenetV2'
scope_name
=
'MobilenetV2'
self
.
check_feature_extractor_variables_under_scope
(
self
.
check_feature_extractor_variables_under_scope
(
depth_multiplier
,
pad_to_multiple
,
scope_name
)
depth_multiplier
,
pad_to_multiple
,
scope_name
,
use_keras
=
use_keras
)
def
test_variable_count
(
self
,
use_keras
):
depth_multiplier
=
1
pad_to_multiple
=
1
variables
=
self
.
get_feature_extractor_variables
(
depth_multiplier
,
pad_to_multiple
,
use_keras
=
use_keras
)
self
.
assertEqual
(
len
(
variables
),
292
)
def
test_has_fused_batchnorm
(
self
):
def
test_has_fused_batchnorm
(
self
,
use_keras
):
image_height
=
40
image_height
=
40
image_width
=
40
image_width
=
40
depth_multiplier
=
1
depth_multiplier
=
1
...
@@ -143,9 +192,13 @@ class SsdMobilenetV2FeatureExtractorTest(
...
@@ -143,9 +192,13 @@ class SsdMobilenetV2FeatureExtractorTest(
image_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
image_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
[
1
,
image_height
,
image_width
,
3
])
[
1
,
image_height
,
image_width
,
3
])
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
pad_to_multiple
)
pad_to_multiple
,
use_keras
=
use_keras
)
preprocessed_image
=
feature_extractor
.
preprocess
(
image_placeholder
)
preprocessed_image
=
feature_extractor
.
preprocess
(
image_placeholder
)
_
=
feature_extractor
.
extract_features
(
preprocessed_image
)
if
use_keras
:
_
=
feature_extractor
(
preprocessed_image
)
else
:
_
=
feature_extractor
.
extract_features
(
preprocessed_image
)
self
.
assertTrue
(
any
(
op
.
type
==
'FusedBatchNorm'
self
.
assertTrue
(
any
(
op
.
type
==
'FusedBatchNorm'
for
op
in
tf
.
get_default_graph
().
get_operations
()))
for
op
in
tf
.
get_default_graph
().
get_operations
()))
...
...
Prev
1
…
6
7
8
9
10
11
12
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment