Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
74a03640
Unverified
Commit
74a03640
authored
Oct 29, 2017
by
vivek rathod
Committed by
GitHub
Oct 29, 2017
Browse files
Merge pull request #2631 from tombstone/feature_extractors_update
feature extractor and model builder update.
parents
ff88581a
3237c080
Changes
23
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1714 additions
and
77 deletions
+1714
-77
research/object_detection/builders/BUILD
research/object_detection/builders/BUILD
+7
-0
research/object_detection/builders/model_builder.py
research/object_detection/builders/model_builder.py
+29
-5
research/object_detection/builders/model_builder_test.py
research/object_detection/builders/model_builder_test.py
+287
-2
research/object_detection/models/BUILD
research/object_detection/models/BUILD
+96
-1
research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor.py
...ion/models/embedded_ssd_mobilenet_v1_feature_extractor.py
+124
-0
research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_test.py
...odels/embedded_ssd_mobilenet_v1_feature_extractor_test.py
+119
-0
research/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor.py
...dels/faster_rcnn_inception_resnet_v2_feature_extractor.py
+9
-3
research/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor_test.py
...faster_rcnn_inception_resnet_v2_feature_extractor_test.py
+1
-0
research/object_detection/models/faster_rcnn_inception_v2_feature_extractor.py
...tion/models/faster_rcnn_inception_v2_feature_extractor.py
+251
-0
research/object_detection/models/faster_rcnn_inception_v2_feature_extractor_test.py
...models/faster_rcnn_inception_v2_feature_extractor_test.py
+126
-0
research/object_detection/models/faster_rcnn_nas_feature_extractor.py
...ect_detection/models/faster_rcnn_nas_feature_extractor.py
+299
-0
research/object_detection/models/faster_rcnn_nas_feature_extractor_test.py
...etection/models/faster_rcnn_nas_feature_extractor_test.py
+109
-0
research/object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py
...tection/models/faster_rcnn_resnet_v1_feature_extractor.py
+19
-6
research/object_detection/models/faster_rcnn_resnet_v1_feature_extractor_test.py
...on/models/faster_rcnn_resnet_v1_feature_extractor_test.py
+1
-0
research/object_detection/models/feature_map_generators.py
research/object_detection/models/feature_map_generators.py
+22
-31
research/object_detection/models/feature_map_generators_test.py
...ch/object_detection/models/feature_map_generators_test.py
+38
-1
research/object_detection/models/ssd_feature_extractor_test.py
...rch/object_detection/models/ssd_feature_extractor_test.py
+13
-15
research/object_detection/models/ssd_inception_v2_feature_extractor.py
...ct_detection/models/ssd_inception_v2_feature_extractor.py
+14
-2
research/object_detection/models/ssd_inception_v2_feature_extractor_test.py
...tection/models/ssd_inception_v2_feature_extractor_test.py
+39
-11
research/object_detection/models/ssd_inception_v3_feature_extractor.py
...ct_detection/models/ssd_inception_v3_feature_extractor.py
+111
-0
No files found.
research/object_detection/builders/BUILD
View file @
74a03640
...
...
@@ -24,9 +24,12 @@ py_library(
"//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch"
,
"//tensorflow_models/object_detection/meta_architectures:rfcn_meta_arch"
,
"//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch"
,
"//tensorflow_models/object_detection/models:embedded_ssd_mobilenet_v1_feature_extractor"
,
"//tensorflow_models/object_detection/models:faster_rcnn_inception_resnet_v2_feature_extractor"
,
"//tensorflow_models/object_detection/models:faster_rcnn_inception_v2_feature_extractor"
,
"//tensorflow_models/object_detection/models:faster_rcnn_resnet_v1_feature_extractor"
,
"//tensorflow_models/object_detection/models:ssd_inception_v2_feature_extractor"
,
"//tensorflow_models/object_detection/models:ssd_inception_v3_feature_extractor"
,
"//tensorflow_models/object_detection/models:ssd_mobilenet_v1_feature_extractor"
,
"//tensorflow_models/object_detection/protos:model_py_pb2"
,
],
...
...
@@ -40,7 +43,11 @@ py_test(
"//tensorflow"
,
"//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch"
,
"//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch"
,
"//tensorflow_models/object_detection/models:faster_rcnn_inception_resnet_v2_feature_extractor"
,
"//tensorflow_models/object_detection/models:faster_rcnn_inception_v2_feature_extractor"
,
"//tensorflow_models/object_detection/models:faster_rcnn_resnet_v1_feature_extractor"
,
"//tensorflow_models/object_detection/models:ssd_inception_v2_feature_extractor"
,
"//tensorflow_models/object_detection/models:ssd_inception_v3_feature_extractor"
,
"//tensorflow_models/object_detection/models:ssd_mobilenet_v1_feature_extractor"
,
"//tensorflow_models/object_detection/protos:model_py_pb2"
,
],
...
...
research/object_detection/builders/model_builder.py
View file @
74a03640
...
...
@@ -28,27 +28,37 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch
from
object_detection.meta_architectures
import
rfcn_meta_arch
from
object_detection.meta_architectures
import
ssd_meta_arch
from
object_detection.models
import
faster_rcnn_inception_resnet_v2_feature_extractor
as
frcnn_inc_res
from
object_detection.models
import
faster_rcnn_inception_v2_feature_extractor
as
frcnn_inc_v2
from
object_detection.models
import
faster_rcnn_nas_feature_extractor
as
frcnn_nas
from
object_detection.models
import
faster_rcnn_resnet_v1_feature_extractor
as
frcnn_resnet_v1
from
object_detection.models.embedded_ssd_mobilenet_v1_feature_extractor
import
EmbeddedSSDMobileNetV1FeatureExtractor
from
object_detection.models.ssd_inception_v2_feature_extractor
import
SSDInceptionV2FeatureExtractor
from
object_detection.models.ssd_inception_v3_feature_extractor
import
SSDInceptionV3FeatureExtractor
from
object_detection.models.ssd_mobilenet_v1_feature_extractor
import
SSDMobileNetV1FeatureExtractor
from
object_detection.protos
import
model_pb2
# A map of names to SSD feature extractors.
SSD_FEATURE_EXTRACTOR_CLASS_MAP
=
{
'ssd_inception_v2'
:
SSDInceptionV2FeatureExtractor
,
'ssd_inception_v3'
:
SSDInceptionV3FeatureExtractor
,
'ssd_mobilenet_v1'
:
SSDMobileNetV1FeatureExtractor
,
'embedded_ssd_mobilenet_v1'
:
EmbeddedSSDMobileNetV1FeatureExtractor
,
}
# A map of names to Faster R-CNN feature extractors.
FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP
=
{
'faster_rcnn_nas'
:
frcnn_nas
.
FasterRCNNNASFeatureExtractor
,
'faster_rcnn_inception_resnet_v2'
:
frcnn_inc_res
.
FasterRCNNInceptionResnetV2FeatureExtractor
,
'faster_rcnn_inception_v2'
:
frcnn_inc_v2
.
FasterRCNNInceptionV2FeatureExtractor
,
'faster_rcnn_resnet50'
:
frcnn_resnet_v1
.
FasterRCNNResnet50FeatureExtractor
,
'faster_rcnn_resnet101'
:
frcnn_resnet_v1
.
FasterRCNNResnet101FeatureExtractor
,
'faster_rcnn_resnet152'
:
frcnn_resnet_v1
.
FasterRCNNResnet152FeatureExtractor
,
'faster_rcnn_inception_resnet_v2'
:
frcnn_inc_res
.
FasterRCNNInceptionResnetV2FeatureExtractor
}
...
...
@@ -94,6 +104,8 @@ def _build_ssd_feature_extractor(feature_extractor_config, is_training,
feature_type
=
feature_extractor_config
.
type
depth_multiplier
=
feature_extractor_config
.
depth_multiplier
min_depth
=
feature_extractor_config
.
min_depth
pad_to_multiple
=
feature_extractor_config
.
pad_to_multiple
batch_norm_trainable
=
feature_extractor_config
.
batch_norm_trainable
conv_hyperparams
=
hyperparams_builder
.
build
(
feature_extractor_config
.
conv_hyperparams
,
is_training
)
...
...
@@ -101,8 +113,9 @@ def _build_ssd_feature_extractor(feature_extractor_config, is_training,
raise
ValueError
(
'Unknown ssd feature_extractor: {}'
.
format
(
feature_type
))
feature_extractor_class
=
SSD_FEATURE_EXTRACTOR_CLASS_MAP
[
feature_type
]
return
feature_extractor_class
(
depth_multiplier
,
min_depth
,
conv_hyperparams
,
reuse_weights
)
return
feature_extractor_class
(
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
,
reuse_weights
)
def
_build_ssd_model
(
ssd_config
,
is_training
):
...
...
@@ -180,6 +193,7 @@ def _build_faster_rcnn_feature_extractor(
feature_type
=
feature_extractor_config
.
type
first_stage_features_stride
=
(
feature_extractor_config
.
first_stage_features_stride
)
batch_norm_trainable
=
feature_extractor_config
.
batch_norm_trainable
if
feature_type
not
in
FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP
:
raise
ValueError
(
'Unknown Faster R-CNN feature_extractor: {}'
.
format
(
...
...
@@ -187,7 +201,8 @@ def _build_faster_rcnn_feature_extractor(
feature_extractor_class
=
FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP
[
feature_type
]
return
feature_extractor_class
(
is_training
,
first_stage_features_stride
,
reuse_weights
)
is_training
,
first_stage_features_stride
,
batch_norm_trainable
,
reuse_weights
)
def
_build_faster_rcnn_model
(
frcnn_config
,
is_training
):
...
...
@@ -248,8 +263,13 @@ def _build_faster_rcnn_model(frcnn_config, is_training):
)
=
post_processing_builder
.
build
(
frcnn_config
.
second_stage_post_processing
)
second_stage_localization_loss_weight
=
(
frcnn_config
.
second_stage_localization_loss_weight
)
second_stage_classification_loss
=
(
losses_builder
.
build_faster_rcnn_classification_loss
(
frcnn_config
.
second_stage_classification_loss
))
second_stage_classification_loss_weight
=
(
frcnn_config
.
second_stage_classification_loss_weight
)
second_stage_mask_prediction_loss_weight
=
(
frcnn_config
.
second_stage_mask_prediction_loss_weight
)
hard_example_miner
=
None
if
frcnn_config
.
HasField
(
'hard_example_miner'
):
...
...
@@ -286,6 +306,8 @@ def _build_faster_rcnn_model(frcnn_config, is_training):
'second_stage_score_conversion_fn'
:
second_stage_score_conversion_fn
,
'second_stage_localization_loss_weight'
:
second_stage_localization_loss_weight
,
'second_stage_classification_loss'
:
second_stage_classification_loss
,
'second_stage_classification_loss_weight'
:
second_stage_classification_loss_weight
,
'hard_example_miner'
:
hard_example_miner
}
...
...
@@ -300,4 +322,6 @@ def _build_faster_rcnn_model(frcnn_config, is_training):
maxpool_kernel_size
=
maxpool_kernel_size
,
maxpool_stride
=
maxpool_stride
,
second_stage_mask_rcnn_box_predictor
=
second_stage_box_predictor
,
second_stage_mask_prediction_loss_weight
=
(
second_stage_mask_prediction_loss_weight
),
**
common_kwargs
)
research/object_detection/builders/model_builder_test.py
View file @
74a03640
...
...
@@ -23,8 +23,11 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch
from
object_detection.meta_architectures
import
rfcn_meta_arch
from
object_detection.meta_architectures
import
ssd_meta_arch
from
object_detection.models
import
faster_rcnn_inception_resnet_v2_feature_extractor
as
frcnn_inc_res
from
object_detection.models
import
faster_rcnn_inception_v2_feature_extractor
as
frcnn_inc_v2
from
object_detection.models
import
faster_rcnn_nas_feature_extractor
as
frcnn_nas
from
object_detection.models
import
faster_rcnn_resnet_v1_feature_extractor
as
frcnn_resnet_v1
from
object_detection.models.ssd_inception_v2_feature_extractor
import
SSDInceptionV2FeatureExtractor
from
object_detection.models.ssd_inception_v3_feature_extractor
import
SSDInceptionV3FeatureExtractor
from
object_detection.models.ssd_mobilenet_v1_feature_extractor
import
SSDMobileNetV1FeatureExtractor
from
object_detection.protos
import
model_pb2
...
...
@@ -123,6 +126,77 @@ class ModelBuilderTest(tf.test.TestCase):
self
.
assertIsInstance
(
model
.
_feature_extractor
,
SSDInceptionV2FeatureExtractor
)
def
test_create_ssd_inception_v3_model_from_config
(
self
):
model_text_proto
=
"""
ssd {
feature_extractor {
type: 'ssd_inception_v3'
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
box_coder {
faster_rcnn_box_coder {
}
}
matcher {
argmax_matcher {
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
aspect_ratios: 1.0
}
}
image_resizer {
fixed_shape_resizer {
height: 320
width: 320
}
}
box_predictor {
convolutional_box_predictor {
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
}
loss {
classification_loss {
weighted_softmax {
}
}
localization_loss {
weighted_smooth_l1 {
}
}
}
}"""
model_proto
=
model_pb2
.
DetectionModel
()
text_format
.
Merge
(
model_text_proto
,
model_proto
)
model
=
self
.
create_model
(
model_proto
)
self
.
assertIsInstance
(
model
,
ssd_meta_arch
.
SSDMetaArch
)
self
.
assertIsInstance
(
model
.
_feature_extractor
,
SSDInceptionV3FeatureExtractor
)
def
test_create_ssd_mobilenet_v1_model_from_config
(
self
):
model_text_proto
=
"""
ssd {
...
...
@@ -138,6 +212,7 @@ class ModelBuilderTest(tf.test.TestCase):
}
}
}
batch_norm_trainable: true
}
box_coder {
faster_rcnn_box_coder {
...
...
@@ -193,6 +268,7 @@ class ModelBuilderTest(tf.test.TestCase):
self
.
assertIsInstance
(
model
,
ssd_meta_arch
.
SSDMetaArch
)
self
.
assertIsInstance
(
model
.
_feature_extractor
,
SSDMobileNetV1FeatureExtractor
)
self
.
assertTrue
(
model
.
_feature_extractor
.
_batch_norm_trainable
)
def
test_create_faster_rcnn_resnet_v1_models_from_config
(
self
):
model_text_proto
=
"""
...
...
@@ -255,12 +331,155 @@ class ModelBuilderTest(tf.test.TestCase):
}"""
model_proto
=
model_pb2
.
DetectionModel
()
text_format
.
Merge
(
model_text_proto
,
model_proto
)
for
extractor_type
,
extractor_class
in
FEATURE_EXTRACTOR_MAPS
.
items
():
for
extractor_type
,
extractor_class
in
FEATURE_EXTRACTOR_MAPS
.
iter
items
():
model_proto
.
faster_rcnn
.
feature_extractor
.
type
=
extractor_type
model
=
model_builder
.
build
(
model_proto
,
is_training
=
True
)
self
.
assertIsInstance
(
model
,
faster_rcnn_meta_arch
.
FasterRCNNMetaArch
)
self
.
assertIsInstance
(
model
.
_feature_extractor
,
extractor_class
)
def
test_create_faster_rcnn_resnet101_with_mask_prediction_enabled
(
self
):
model_text_proto
=
"""
faster_rcnn {
num_classes: 3
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 600
max_dimension: 1024
}
}
feature_extractor {
type: 'faster_rcnn_resnet101'
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
second_stage_box_predictor {
mask_rcnn_box_predictor {
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
predict_instance_masks: true
}
}
second_stage_mask_prediction_loss_weight: 3.0
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.01
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
}"""
model_proto
=
model_pb2
.
DetectionModel
()
text_format
.
Merge
(
model_text_proto
,
model_proto
)
model
=
model_builder
.
build
(
model_proto
,
is_training
=
True
)
self
.
assertAlmostEqual
(
model
.
_second_stage_mask_loss_weight
,
3.0
)
def
test_create_faster_rcnn_nas_model_from_config
(
self
):
model_text_proto
=
"""
faster_rcnn {
num_classes: 3
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 600
max_dimension: 1024
}
}
feature_extractor {
type: 'faster_rcnn_nas'
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
initial_crop_size: 17
maxpool_kernel_size: 1
maxpool_stride: 1
second_stage_box_predictor {
mask_rcnn_box_predictor {
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.01
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
}"""
model_proto
=
model_pb2
.
DetectionModel
()
text_format
.
Merge
(
model_text_proto
,
model_proto
)
model
=
model_builder
.
build
(
model_proto
,
is_training
=
True
)
self
.
assertIsInstance
(
model
,
faster_rcnn_meta_arch
.
FasterRCNNMetaArch
)
self
.
assertIsInstance
(
model
.
_feature_extractor
,
frcnn_nas
.
FasterRCNNNASFeatureExtractor
)
def
test_create_faster_rcnn_inception_resnet_v2_model_from_config
(
self
):
model_text_proto
=
"""
faster_rcnn {
...
...
@@ -328,6 +547,72 @@ class ModelBuilderTest(tf.test.TestCase):
model
.
_feature_extractor
,
frcnn_inc_res
.
FasterRCNNInceptionResnetV2FeatureExtractor
)
def
test_create_faster_rcnn_inception_v2_model_from_config
(
self
):
model_text_proto
=
"""
faster_rcnn {
num_classes: 3
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 600
max_dimension: 1024
}
}
feature_extractor {
type: 'faster_rcnn_inception_v2'
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
second_stage_box_predictor {
mask_rcnn_box_predictor {
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.01
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
}"""
model_proto
=
model_pb2
.
DetectionModel
()
text_format
.
Merge
(
model_text_proto
,
model_proto
)
model
=
model_builder
.
build
(
model_proto
,
is_training
=
True
)
self
.
assertIsInstance
(
model
,
faster_rcnn_meta_arch
.
FasterRCNNMetaArch
)
self
.
assertIsInstance
(
model
.
_feature_extractor
,
frcnn_inc_v2
.
FasterRCNNInceptionV2FeatureExtractor
)
def
test_create_faster_rcnn_model_from_config_with_example_miner
(
self
):
model_text_proto
=
"""
faster_rcnn {
...
...
@@ -445,7 +730,7 @@ class ModelBuilderTest(tf.test.TestCase):
}"""
model_proto
=
model_pb2
.
DetectionModel
()
text_format
.
Merge
(
model_text_proto
,
model_proto
)
for
extractor_type
,
extractor_class
in
FEATURE_EXTRACTOR_MAPS
.
items
():
for
extractor_type
,
extractor_class
in
FEATURE_EXTRACTOR_MAPS
.
iter
items
():
model_proto
.
faster_rcnn
.
feature_extractor
.
type
=
extractor_type
model
=
model_builder
.
build
(
model_proto
,
is_training
=
True
)
self
.
assertIsInstance
(
model
,
rfcn_meta_arch
.
RFCNMetaArch
)
...
...
research/object_detection/models/BUILD
View file @
74a03640
...
...
@@ -15,7 +15,6 @@ py_library(
],
deps
=
[
"//tensorflow"
,
"//tensorflow_models/object_detection/utils:ops"
,
],
)
...
...
@@ -49,10 +48,25 @@ py_library(
":feature_map_generators"
,
"//tensorflow"
,
"//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch"
,
"//tensorflow_models/object_detection/utils:ops"
,
"//tensorflow_models/slim:inception_v2"
,
],
)
py_library
(
name
=
"ssd_inception_v3_feature_extractor"
,
srcs
=
[
"ssd_inception_v3_feature_extractor.py"
,
],
deps
=
[
":feature_map_generators"
,
"//tensorflow"
,
"//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch"
,
"//tensorflow_models/object_detection/utils:ops"
,
"//tensorflow_models/slim:inception_v3"
,
],
)
py_library
(
name
=
"ssd_mobilenet_v1_feature_extractor"
,
srcs
=
[
"ssd_mobilenet_v1_feature_extractor.py"
],
...
...
@@ -60,6 +74,19 @@ py_library(
":feature_map_generators"
,
"//tensorflow"
,
"//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch"
,
"//tensorflow_models/object_detection/utils:ops"
,
"//tensorflow_models/slim:mobilenet_v1"
,
],
)
py_library
(
name
=
"embedded_ssd_mobilenet_v1_feature_extractor"
,
srcs
=
[
"embedded_ssd_mobilenet_v1_feature_extractor.py"
],
deps
=
[
":feature_map_generators"
,
":ssd_mobilenet_v1_feature_extractor"
,
"//tensorflow"
,
"//tensorflow_models/object_detection/utils:ops"
,
"//tensorflow_models/slim:mobilenet_v1"
,
],
)
...
...
@@ -76,6 +103,18 @@ py_test(
],
)
py_test
(
name
=
"ssd_inception_v3_feature_extractor_test"
,
srcs
=
[
"ssd_inception_v3_feature_extractor_test.py"
,
],
deps
=
[
":ssd_feature_extractor_test"
,
":ssd_inception_v3_feature_extractor"
,
"//tensorflow"
,
],
)
py_test
(
name
=
"ssd_mobilenet_v1_feature_extractor_test"
,
srcs
=
[
"ssd_mobilenet_v1_feature_extractor_test.py"
],
...
...
@@ -86,6 +125,39 @@ py_test(
],
)
py_test
(
name
=
"embedded_ssd_mobilenet_v1_feature_extractor_test"
,
srcs
=
[
"embedded_ssd_mobilenet_v1_feature_extractor_test.py"
],
deps
=
[
":embedded_ssd_mobilenet_v1_feature_extractor"
,
":ssd_feature_extractor_test"
,
"//tensorflow"
,
],
)
py_test
(
name
=
"faster_rcnn_nas_feature_extractor_test"
,
srcs
=
[
"faster_rcnn_nas_feature_extractor_test.py"
,
],
deps
=
[
":faster_rcnn_nas_feature_extractor"
,
"//tensorflow"
,
],
)
py_library
(
name
=
"faster_rcnn_nas_feature_extractor"
,
srcs
=
[
"faster_rcnn_nas_feature_extractor.py"
,
],
deps
=
[
"//tensorflow"
,
"//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch"
,
"//tensorflow_models/slim:nasnet"
,
],
)
py_library
(
name
=
"faster_rcnn_inception_resnet_v2_feature_extractor"
,
srcs
=
[
...
...
@@ -109,6 +181,29 @@ py_test(
],
)
py_library
(
name
=
"faster_rcnn_inception_v2_feature_extractor"
,
srcs
=
[
"faster_rcnn_inception_v2_feature_extractor.py"
,
],
deps
=
[
"//tensorflow"
,
"//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch"
,
"//tensorflow_models/slim:inception_v2"
,
],
)
py_test
(
name
=
"faster_rcnn_inception_v2_feature_extractor_test"
,
srcs
=
[
"faster_rcnn_inception_v2_feature_extractor_test.py"
,
],
deps
=
[
":faster_rcnn_inception_v2_feature_extractor"
,
"//tensorflow"
,
],
)
py_library
(
name
=
"faster_rcnn_resnet_v1_feature_extractor"
,
srcs
=
[
...
...
research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor.py
0 → 100644
View file @
74a03640
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Embedded-friendly SSDFeatureExtractor for MobilenetV1 features."""
import
tensorflow
as
tf
from
object_detection.models
import
feature_map_generators
from
object_detection.models
import
ssd_mobilenet_v1_feature_extractor
from
object_detection.utils
import
ops
from
nets
import
mobilenet_v1
slim
=
tf
.
contrib
.
slim
class
EmbeddedSSDMobileNetV1FeatureExtractor
(
ssd_mobilenet_v1_feature_extractor
.
SSDMobileNetV1FeatureExtractor
):
"""Embedded-friendly SSD Feature Extractor using MobilenetV1 features.
This feature extractor is similar to SSD MobileNetV1 feature extractor, and
it fixes input resolution to be 256x256, reduces the number of feature maps
used for box prediction and ensures convolution kernel to be no larger
than input tensor in spatial dimensions.
This feature extractor requires support of the following ops if used in
embedded devices:
- Conv
- DepthwiseConv
- Relu6
All conv/depthwiseconv use SAME padding, and no additional spatial padding is
needed.
"""
def
__init__
(
self
,
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
=
True
,
reuse_weights
=
None
):
"""MobileNetV1 Feature Extractor for Embedded-friendly SSD Models.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. For EmbeddedSSD it must be set to 1.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
Raises:
ValueError: upon invalid `pad_to_multiple` values.
"""
if
pad_to_multiple
!=
1
:
raise
ValueError
(
'Embedded-specific SSD only supports `pad_to_multiple` '
'of 1.'
)
super
(
EmbeddedSSDMobileNetV1FeatureExtractor
,
self
).
__init__
(
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
,
reuse_weights
)
def
extract_features
(
self
,
preprocessed_inputs
):
"""Extract features from preprocessed inputs.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
preprocessed_inputs
.
get_shape
().
assert_has_rank
(
4
)
shape_assert
=
tf
.
Assert
(
tf
.
logical_and
(
tf
.
equal
(
tf
.
shape
(
preprocessed_inputs
)[
1
],
256
),
tf
.
equal
(
tf
.
shape
(
preprocessed_inputs
)[
2
],
256
)),
[
'image size must be 256 in both height and width.'
])
feature_map_layout
=
{
'from_layer'
:
[
'Conv2d_11_pointwise'
,
'Conv2d_13_pointwise'
,
''
,
''
,
''
],
'layer_depth'
:
[
-
1
,
-
1
,
512
,
256
,
256
],
'conv_kernel_size'
:
[
-
1
,
-
1
,
3
,
3
,
2
],
}
with
tf
.
control_dependencies
([
shape_assert
]):
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
):
with
tf
.
variable_scope
(
'MobilenetV1'
,
reuse
=
self
.
_reuse_weights
)
as
scope
:
_
,
image_features
=
mobilenet_v1
.
mobilenet_v1_base
(
ops
.
pad_to_multiple
(
preprocessed_inputs
,
self
.
_pad_to_multiple
),
final_endpoint
=
'Conv2d_13_pointwise'
,
min_depth
=
self
.
_min_depth
,
depth_multiplier
=
self
.
_depth_multiplier
,
scope
=
scope
)
feature_maps
=
feature_map_generators
.
multi_resolution_feature_maps
(
feature_map_layout
=
feature_map_layout
,
depth_multiplier
=
self
.
_depth_multiplier
,
min_depth
=
self
.
_min_depth
,
insert_1x1_conv
=
True
,
image_features
=
image_features
)
return
feature_maps
.
values
()
research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_test.py
0 → 100644
View file @
74a03640
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for embedded_ssd_mobilenet_v1_feature_extractor."""
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.models
import
embedded_ssd_mobilenet_v1_feature_extractor
from
object_detection.models
import
ssd_feature_extractor_test
class
EmbeddedSSDMobileNetV1FeatureExtractorTest
(
ssd_feature_extractor_test
.
SsdFeatureExtractorTestBase
,
tf
.
test
.
TestCase
):
def
_create_feature_extractor
(
self
,
depth_multiplier
,
pad_to_multiple
,
is_training
=
True
,
batch_norm_trainable
=
True
):
"""Constructs a new feature extractor.
Args:
depth_multiplier: float depth multiplier for feature extractor
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
is_training: whether the network is in training mode.
batch_norm_trainable: whether to update batch norm parameters during
training.
Returns:
an ssd_meta_arch.SSDFeatureExtractor object.
"""
min_depth
=
32
conv_hyperparams
=
{}
return
(
embedded_ssd_mobilenet_v1_feature_extractor
.
EmbeddedSSDMobileNetV1FeatureExtractor
(
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
))
def
test_extract_features_returns_correct_shapes_256
(
self
):
image_height
=
256
image_width
=
256
depth_multiplier
=
1.0
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
4
,
16
,
16
,
512
),
(
4
,
8
,
8
,
1024
),
(
4
,
4
,
4
,
512
),
(
4
,
2
,
2
,
256
),
(
4
,
1
,
1
,
256
)]
self
.
check_extract_features_returns_correct_shape
(
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
def
test_extract_features_returns_correct_shapes_enforcing_min_depth
(
self
):
image_height
=
256
image_width
=
256
depth_multiplier
=
0.5
**
12
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
4
,
16
,
16
,
32
),
(
4
,
8
,
8
,
32
),
(
4
,
4
,
4
,
32
),
(
4
,
2
,
2
,
32
),
(
4
,
1
,
1
,
32
)]
self
.
check_extract_features_returns_correct_shape
(
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
def
test_extract_features_returns_correct_shapes_with_pad_to_multiple_of_1
(
self
):
image_height
=
256
image_width
=
256
depth_multiplier
=
1.0
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
4
,
16
,
16
,
512
),
(
4
,
8
,
8
,
1024
),
(
4
,
4
,
4
,
512
),
(
4
,
2
,
2
,
256
),
(
4
,
1
,
1
,
256
)]
self
.
check_extract_features_returns_correct_shape
(
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
def
test_extract_features_raises_error_with_pad_to_multiple_not_1
(
self
):
depth_multiplier
=
1.0
pad_to_multiple
=
2
with
self
.
assertRaises
(
ValueError
):
_
=
self
.
_create_feature_extractor
(
depth_multiplier
,
pad_to_multiple
)
def
test_extract_features_raises_error_with_invalid_image_size
(
self
):
image_height
=
128
image_width
=
128
depth_multiplier
=
1.0
pad_to_multiple
=
1
self
.
check_extract_features_raises_error_with_invalid_image_size
(
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
)
def
test_preprocess_returns_correct_value_range
(
self
):
image_height
=
256
image_width
=
256
depth_multiplier
=
1
pad_to_multiple
=
1
test_image
=
np
.
random
.
rand
(
4
,
image_height
,
image_width
,
3
)
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
pad_to_multiple
)
preprocessed_image
=
feature_extractor
.
preprocess
(
test_image
)
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
np
.
abs
(
preprocessed_image
),
1.0
)))
def
test_variables_only_created_in_scope
(
self
):
depth_multiplier
=
1
pad_to_multiple
=
1
scope_name
=
'MobilenetV1'
self
.
check_feature_extractor_variables_under_scope
(
depth_multiplier
,
pad_to_multiple
,
scope_name
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor.py
View file @
74a03640
...
...
@@ -37,6 +37,7 @@ class FasterRCNNInceptionResnetV2FeatureExtractor(
def
__init__
(
self
,
is_training
,
first_stage_features_stride
,
batch_norm_trainable
=
False
,
reuse_weights
=
None
,
weight_decay
=
0.0
):
"""Constructor.
...
...
@@ -44,6 +45,7 @@ class FasterRCNNInceptionResnetV2FeatureExtractor(
Args:
is_training: See base class.
first_stage_features_stride: See base class.
batch_norm_trainable: See base class.
reuse_weights: See base class.
weight_decay: See base class.
...
...
@@ -53,7 +55,8 @@ class FasterRCNNInceptionResnetV2FeatureExtractor(
if
first_stage_features_stride
!=
8
and
first_stage_features_stride
!=
16
:
raise
ValueError
(
'`first_stage_features_stride` must be 8 or 16.'
)
super
(
FasterRCNNInceptionResnetV2FeatureExtractor
,
self
).
__init__
(
is_training
,
first_stage_features_stride
,
reuse_weights
,
weight_decay
)
is_training
,
first_stage_features_stride
,
batch_norm_trainable
,
reuse_weights
,
weight_decay
)
def
preprocess
(
self
,
resized_inputs
):
"""Faster R-CNN with Inception Resnet v2 preprocessing.
...
...
@@ -98,7 +101,8 @@ class FasterRCNNInceptionResnetV2FeatureExtractor(
with
slim
.
arg_scope
(
inception_resnet_v2
.
inception_resnet_v2_arg_scope
(
weight_decay
=
self
.
_weight_decay
)):
# Forces is_training to False to disable batch norm update.
with
slim
.
arg_scope
([
slim
.
batch_norm
],
is_training
=
False
):
with
slim
.
arg_scope
([
slim
.
batch_norm
],
is_training
=
self
.
_train_batch_norm
):
with
tf
.
variable_scope
(
'InceptionResnetV2'
,
reuse
=
self
.
_reuse_weights
)
as
scope
:
rpn_feature_map
,
_
=
(
...
...
@@ -129,7 +133,8 @@ class FasterRCNNInceptionResnetV2FeatureExtractor(
with
slim
.
arg_scope
(
inception_resnet_v2
.
inception_resnet_v2_arg_scope
(
weight_decay
=
self
.
_weight_decay
)):
# Forces is_training to False to disable batch norm update.
with
slim
.
arg_scope
([
slim
.
batch_norm
],
is_training
=
False
):
with
slim
.
arg_scope
([
slim
.
batch_norm
],
is_training
=
self
.
_train_batch_norm
):
with
slim
.
arg_scope
([
slim
.
conv2d
,
slim
.
max_pool2d
,
slim
.
avg_pool2d
],
stride
=
1
,
padding
=
'SAME'
):
with
tf
.
variable_scope
(
'Mixed_7a'
):
...
...
@@ -207,3 +212,4 @@ class FasterRCNNInceptionResnetV2FeatureExtractor(
second_stage_feature_extractor_scope
+
'/'
,
''
)
variables_to_restore
[
var_name
]
=
variable
return
variables_to_restore
research/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor_test.py
View file @
74a03640
...
...
@@ -26,6 +26,7 @@ class FasterRcnnInceptionResnetV2FeatureExtractorTest(tf.test.TestCase):
return
frcnn_inc_res
.
FasterRCNNInceptionResnetV2FeatureExtractor
(
is_training
=
False
,
first_stage_features_stride
=
first_stage_features_stride
,
batch_norm_trainable
=
False
,
reuse_weights
=
None
,
weight_decay
=
0.0
)
...
...
research/object_detection/models/faster_rcnn_inception_v2_feature_extractor.py
0 → 100644
View file @
74a03640
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Inception V2 Faster R-CNN implementation.
See "Rethinking the Inception Architecture for Computer Vision"
https://arxiv.org/abs/1512.00567
"""
import
tensorflow
as
tf
from
object_detection.meta_architectures
import
faster_rcnn_meta_arch
from
nets
import
inception_v2
slim
=
tf
.
contrib
.
slim
def
_batch_norm_arg_scope
(
list_ops
,
use_batch_norm
=
True
,
batch_norm_decay
=
0.9997
,
batch_norm_epsilon
=
0.001
,
batch_norm_scale
=
False
,
train_batch_norm
=
False
):
"""Slim arg scope for InceptionV2 batch norm."""
if
use_batch_norm
:
batch_norm_params
=
{
'is_training'
:
train_batch_norm
,
'scale'
:
batch_norm_scale
,
'decay'
:
batch_norm_decay
,
'epsilon'
:
batch_norm_epsilon
}
normalizer_fn
=
slim
.
batch_norm
else
:
normalizer_fn
=
None
batch_norm_params
=
None
return
slim
.
arg_scope
(
list_ops
,
normalizer_fn
=
normalizer_fn
,
normalizer_params
=
batch_norm_params
)
class
FasterRCNNInceptionV2FeatureExtractor
(
faster_rcnn_meta_arch
.
FasterRCNNFeatureExtractor
):
"""Faster R-CNN Inception V2 feature extractor implementation."""
def
__init__
(
self
,
is_training
,
first_stage_features_stride
,
batch_norm_trainable
=
False
,
reuse_weights
=
None
,
weight_decay
=
0.0
,
depth_multiplier
=
1.0
,
min_depth
=
16
):
"""Constructor.
Args:
is_training: See base class.
first_stage_features_stride: See base class.
batch_norm_trainable: See base class.
reuse_weights: See base class.
weight_decay: See base class.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
Raises:
ValueError: If `first_stage_features_stride` is not 8 or 16.
"""
if
first_stage_features_stride
!=
8
and
first_stage_features_stride
!=
16
:
raise
ValueError
(
'`first_stage_features_stride` must be 8 or 16.'
)
self
.
_depth_multiplier
=
depth_multiplier
self
.
_min_depth
=
min_depth
super
(
FasterRCNNInceptionV2FeatureExtractor
,
self
).
__init__
(
is_training
,
first_stage_features_stride
,
batch_norm_trainable
,
reuse_weights
,
weight_decay
)
def
preprocess
(
self
,
resized_inputs
):
"""Faster R-CNN Inception V2 preprocessing.
Maps pixel values to the range [-1, 1].
Args:
resized_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
return
(
2.0
/
255.0
)
*
resized_inputs
-
1.0
def
_extract_proposal_features
(
self
,
preprocessed_inputs
,
scope
):
"""Extracts first stage RPN features.
Args:
preprocessed_inputs: A [batch, height, width, channels] float32 tensor
representing a batch of images.
scope: A scope name.
Returns:
rpn_feature_map: A tensor with shape [batch, height, width, depth]
Raises:
InvalidArgumentError: If the spatial size of `preprocessed_inputs`
(height or width) is less than 33.
ValueError: If the created network is missing the required activation.
"""
preprocessed_inputs
.
get_shape
().
assert_has_rank
(
4
)
shape_assert
=
tf
.
Assert
(
tf
.
logical_and
(
tf
.
greater_equal
(
tf
.
shape
(
preprocessed_inputs
)[
1
],
33
),
tf
.
greater_equal
(
tf
.
shape
(
preprocessed_inputs
)[
2
],
33
)),
[
'image size must at least be 33 in both height and width.'
])
with
tf
.
control_dependencies
([
shape_assert
]):
with
tf
.
variable_scope
(
'InceptionV2'
,
reuse
=
self
.
_reuse_weights
)
as
scope
:
with
_batch_norm_arg_scope
([
slim
.
conv2d
,
slim
.
separable_conv2d
],
batch_norm_scale
=
True
,
train_batch_norm
=
self
.
_train_batch_norm
):
_
,
activations
=
inception_v2
.
inception_v2_base
(
preprocessed_inputs
,
final_endpoint
=
'Mixed_4e'
,
min_depth
=
self
.
_min_depth
,
depth_multiplier
=
self
.
_depth_multiplier
,
scope
=
scope
)
return
activations
[
'Mixed_4e'
]
def
_extract_box_classifier_features
(
self
,
proposal_feature_maps
,
scope
):
"""Extracts second stage box classifier features.
Args:
proposal_feature_maps: A 4-D float tensor with shape
[batch_size * self.max_num_proposals, crop_height, crop_width, depth]
representing the feature map cropped to each proposal.
scope: A scope name (unused).
Returns:
proposal_classifier_features: A 4-D float tensor with shape
[batch_size * self.max_num_proposals, height, width, depth]
representing box classifier features for each proposal.
"""
net
=
proposal_feature_maps
depth
=
lambda
d
:
max
(
int
(
d
*
self
.
_depth_multiplier
),
self
.
_min_depth
)
trunc_normal
=
lambda
stddev
:
tf
.
truncated_normal_initializer
(
0.0
,
stddev
)
data_format
=
'NHWC'
concat_dim
=
3
if
data_format
==
'NHWC'
else
1
with
tf
.
variable_scope
(
'InceptionV2'
,
reuse
=
self
.
_reuse_weights
):
with
slim
.
arg_scope
(
[
slim
.
conv2d
,
slim
.
max_pool2d
,
slim
.
avg_pool2d
],
stride
=
1
,
padding
=
'SAME'
,
data_format
=
data_format
):
with
_batch_norm_arg_scope
([
slim
.
conv2d
,
slim
.
separable_conv2d
],
batch_norm_scale
=
True
,
train_batch_norm
=
self
.
_train_batch_norm
):
with
tf
.
variable_scope
(
'Mixed_5a'
):
with
tf
.
variable_scope
(
'Branch_0'
):
branch_0
=
slim
.
conv2d
(
net
,
depth
(
128
),
[
1
,
1
],
weights_initializer
=
trunc_normal
(
0.09
),
scope
=
'Conv2d_0a_1x1'
)
branch_0
=
slim
.
conv2d
(
branch_0
,
depth
(
192
),
[
3
,
3
],
stride
=
2
,
scope
=
'Conv2d_1a_3x3'
)
with
tf
.
variable_scope
(
'Branch_1'
):
branch_1
=
slim
.
conv2d
(
net
,
depth
(
192
),
[
1
,
1
],
weights_initializer
=
trunc_normal
(
0.09
),
scope
=
'Conv2d_0a_1x1'
)
branch_1
=
slim
.
conv2d
(
branch_1
,
depth
(
256
),
[
3
,
3
],
scope
=
'Conv2d_0b_3x3'
)
branch_1
=
slim
.
conv2d
(
branch_1
,
depth
(
256
),
[
3
,
3
],
stride
=
2
,
scope
=
'Conv2d_1a_3x3'
)
with
tf
.
variable_scope
(
'Branch_2'
):
branch_2
=
slim
.
max_pool2d
(
net
,
[
3
,
3
],
stride
=
2
,
scope
=
'MaxPool_1a_3x3'
)
net
=
tf
.
concat
([
branch_0
,
branch_1
,
branch_2
],
concat_dim
)
with
tf
.
variable_scope
(
'Mixed_5b'
):
with
tf
.
variable_scope
(
'Branch_0'
):
branch_0
=
slim
.
conv2d
(
net
,
depth
(
352
),
[
1
,
1
],
scope
=
'Conv2d_0a_1x1'
)
with
tf
.
variable_scope
(
'Branch_1'
):
branch_1
=
slim
.
conv2d
(
net
,
depth
(
192
),
[
1
,
1
],
weights_initializer
=
trunc_normal
(
0.09
),
scope
=
'Conv2d_0a_1x1'
)
branch_1
=
slim
.
conv2d
(
branch_1
,
depth
(
320
),
[
3
,
3
],
scope
=
'Conv2d_0b_3x3'
)
with
tf
.
variable_scope
(
'Branch_2'
):
branch_2
=
slim
.
conv2d
(
net
,
depth
(
160
),
[
1
,
1
],
weights_initializer
=
trunc_normal
(
0.09
),
scope
=
'Conv2d_0a_1x1'
)
branch_2
=
slim
.
conv2d
(
branch_2
,
depth
(
224
),
[
3
,
3
],
scope
=
'Conv2d_0b_3x3'
)
branch_2
=
slim
.
conv2d
(
branch_2
,
depth
(
224
),
[
3
,
3
],
scope
=
'Conv2d_0c_3x3'
)
with
tf
.
variable_scope
(
'Branch_3'
):
branch_3
=
slim
.
avg_pool2d
(
net
,
[
3
,
3
],
scope
=
'AvgPool_0a_3x3'
)
branch_3
=
slim
.
conv2d
(
branch_3
,
depth
(
128
),
[
1
,
1
],
weights_initializer
=
trunc_normal
(
0.1
),
scope
=
'Conv2d_0b_1x1'
)
net
=
tf
.
concat
([
branch_0
,
branch_1
,
branch_2
,
branch_3
],
concat_dim
)
with
tf
.
variable_scope
(
'Mixed_5c'
):
with
tf
.
variable_scope
(
'Branch_0'
):
branch_0
=
slim
.
conv2d
(
net
,
depth
(
352
),
[
1
,
1
],
scope
=
'Conv2d_0a_1x1'
)
with
tf
.
variable_scope
(
'Branch_1'
):
branch_1
=
slim
.
conv2d
(
net
,
depth
(
192
),
[
1
,
1
],
weights_initializer
=
trunc_normal
(
0.09
),
scope
=
'Conv2d_0a_1x1'
)
branch_1
=
slim
.
conv2d
(
branch_1
,
depth
(
320
),
[
3
,
3
],
scope
=
'Conv2d_0b_3x3'
)
with
tf
.
variable_scope
(
'Branch_2'
):
branch_2
=
slim
.
conv2d
(
net
,
depth
(
192
),
[
1
,
1
],
weights_initializer
=
trunc_normal
(
0.09
),
scope
=
'Conv2d_0a_1x1'
)
branch_2
=
slim
.
conv2d
(
branch_2
,
depth
(
224
),
[
3
,
3
],
scope
=
'Conv2d_0b_3x3'
)
branch_2
=
slim
.
conv2d
(
branch_2
,
depth
(
224
),
[
3
,
3
],
scope
=
'Conv2d_0c_3x3'
)
with
tf
.
variable_scope
(
'Branch_3'
):
branch_3
=
slim
.
max_pool2d
(
net
,
[
3
,
3
],
scope
=
'MaxPool_0a_3x3'
)
branch_3
=
slim
.
conv2d
(
branch_3
,
depth
(
128
),
[
1
,
1
],
weights_initializer
=
trunc_normal
(
0.1
),
scope
=
'Conv2d_0b_1x1'
)
proposal_classifier_features
=
tf
.
concat
(
[
branch_0
,
branch_1
,
branch_2
,
branch_3
],
concat_dim
)
return
proposal_classifier_features
research/object_detection/models/faster_rcnn_inception_v2_feature_extractor_test.py
0 → 100644
View file @
74a03640
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for faster_rcnn_inception_v2_feature_extractor."""
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.models
import
faster_rcnn_inception_v2_feature_extractor
as
faster_rcnn_inception_v2
class
FasterRcnnInceptionV2FeatureExtractorTest
(
tf
.
test
.
TestCase
):
def
_build_feature_extractor
(
self
,
first_stage_features_stride
):
return
faster_rcnn_inception_v2
.
FasterRCNNInceptionV2FeatureExtractor
(
is_training
=
False
,
first_stage_features_stride
=
first_stage_features_stride
,
batch_norm_trainable
=
False
,
reuse_weights
=
None
,
weight_decay
=
0.0
)
def
test_extract_proposal_features_returns_expected_size
(
self
):
feature_extractor
=
self
.
_build_feature_extractor
(
first_stage_features_stride
=
16
)
preprocessed_inputs
=
tf
.
random_uniform
(
[
4
,
224
,
224
,
3
],
maxval
=
255
,
dtype
=
tf
.
float32
)
rpn_feature_map
=
feature_extractor
.
extract_proposal_features
(
preprocessed_inputs
,
scope
=
'TestScope'
)
features_shape
=
tf
.
shape
(
rpn_feature_map
)
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
features_shape_out
=
sess
.
run
(
features_shape
)
self
.
assertAllEqual
(
features_shape_out
,
[
4
,
14
,
14
,
576
])
def
test_extract_proposal_features_stride_eight
(
self
):
feature_extractor
=
self
.
_build_feature_extractor
(
first_stage_features_stride
=
8
)
preprocessed_inputs
=
tf
.
random_uniform
(
[
4
,
224
,
224
,
3
],
maxval
=
255
,
dtype
=
tf
.
float32
)
rpn_feature_map
=
feature_extractor
.
extract_proposal_features
(
preprocessed_inputs
,
scope
=
'TestScope'
)
features_shape
=
tf
.
shape
(
rpn_feature_map
)
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
features_shape_out
=
sess
.
run
(
features_shape
)
self
.
assertAllEqual
(
features_shape_out
,
[
4
,
14
,
14
,
576
])
def
test_extract_proposal_features_half_size_input
(
self
):
feature_extractor
=
self
.
_build_feature_extractor
(
first_stage_features_stride
=
16
)
preprocessed_inputs
=
tf
.
random_uniform
(
[
1
,
112
,
112
,
3
],
maxval
=
255
,
dtype
=
tf
.
float32
)
rpn_feature_map
=
feature_extractor
.
extract_proposal_features
(
preprocessed_inputs
,
scope
=
'TestScope'
)
features_shape
=
tf
.
shape
(
rpn_feature_map
)
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
features_shape_out
=
sess
.
run
(
features_shape
)
self
.
assertAllEqual
(
features_shape_out
,
[
1
,
7
,
7
,
576
])
def
test_extract_proposal_features_dies_on_invalid_stride
(
self
):
with
self
.
assertRaises
(
ValueError
):
self
.
_build_feature_extractor
(
first_stage_features_stride
=
99
)
def
test_extract_proposal_features_dies_on_very_small_images
(
self
):
feature_extractor
=
self
.
_build_feature_extractor
(
first_stage_features_stride
=
16
)
preprocessed_inputs
=
tf
.
placeholder
(
tf
.
float32
,
(
4
,
None
,
None
,
3
))
rpn_feature_map
=
feature_extractor
.
extract_proposal_features
(
preprocessed_inputs
,
scope
=
'TestScope'
)
features_shape
=
tf
.
shape
(
rpn_feature_map
)
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
with
self
.
assertRaises
(
tf
.
errors
.
InvalidArgumentError
):
sess
.
run
(
features_shape
,
feed_dict
=
{
preprocessed_inputs
:
np
.
random
.
rand
(
4
,
32
,
32
,
3
)})
def
test_extract_proposal_features_dies_with_incorrect_rank_inputs
(
self
):
feature_extractor
=
self
.
_build_feature_extractor
(
first_stage_features_stride
=
16
)
preprocessed_inputs
=
tf
.
random_uniform
(
[
224
,
224
,
3
],
maxval
=
255
,
dtype
=
tf
.
float32
)
with
self
.
assertRaises
(
ValueError
):
feature_extractor
.
extract_proposal_features
(
preprocessed_inputs
,
scope
=
'TestScope'
)
def
test_extract_box_classifier_features_returns_expected_size
(
self
):
feature_extractor
=
self
.
_build_feature_extractor
(
first_stage_features_stride
=
16
)
proposal_feature_maps
=
tf
.
random_uniform
(
[
3
,
14
,
14
,
576
],
maxval
=
255
,
dtype
=
tf
.
float32
)
proposal_classifier_features
=
(
feature_extractor
.
extract_box_classifier_features
(
proposal_feature_maps
,
scope
=
'TestScope'
))
features_shape
=
tf
.
shape
(
proposal_classifier_features
)
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
features_shape_out
=
sess
.
run
(
features_shape
)
self
.
assertAllEqual
(
features_shape_out
,
[
3
,
7
,
7
,
1024
])
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/models/faster_rcnn_nas_feature_extractor.py
0 → 100644
View file @
74a03640
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""NASNet Faster R-CNN implementation.
Learning Transferable Architectures for Scalable Image Recognition
Barret Zoph, Vijay Vasudevan, Jonathon Shlens, Quoc V. Le
https://arxiv.org/abs/1707.07012
"""
import
tensorflow
as
tf
from
object_detection.meta_architectures
import
faster_rcnn_meta_arch
from
nets.nasnet
import
nasnet
from
nets.nasnet
import
nasnet_utils
arg_scope
=
tf
.
contrib
.
framework
.
arg_scope
slim
=
tf
.
contrib
.
slim
# Note: This is largely a copy of _build_nasnet_base inside nasnet.py but
# with special edits to remove instantiation of the stem and the special
# ability to receive as input a pair of hidden states.
def
_build_nasnet_base
(
hidden_previous
,
hidden
,
normal_cell
,
reduction_cell
,
hparams
,
true_cell_num
,
start_cell_num
):
"""Constructs a NASNet image model."""
# Find where to place the reduction cells or stride normal cells
reduction_indices
=
nasnet_utils
.
calc_reduction_layers
(
hparams
.
num_cells
,
hparams
.
num_reduction_layers
)
# Note: The None is prepended to match the behavior of _imagenet_stem()
cell_outputs
=
[
None
,
hidden_previous
,
hidden
]
net
=
hidden
# NOTE: In the nasnet.py code, filter_scaling starts at 1.0. We instead
# start at 2.0 because 1 reduction cell has been created which would
# update the filter_scaling to 2.0.
filter_scaling
=
2.0
# Run the cells
for
cell_num
in
range
(
start_cell_num
,
hparams
.
num_cells
):
stride
=
1
if
hparams
.
skip_reduction_layer_input
:
prev_layer
=
cell_outputs
[
-
2
]
if
cell_num
in
reduction_indices
:
filter_scaling
*=
hparams
.
filter_scaling_rate
net
=
reduction_cell
(
net
,
scope
=
'reduction_cell_{}'
.
format
(
reduction_indices
.
index
(
cell_num
)),
filter_scaling
=
filter_scaling
,
stride
=
2
,
prev_layer
=
cell_outputs
[
-
2
],
cell_num
=
true_cell_num
)
true_cell_num
+=
1
cell_outputs
.
append
(
net
)
if
not
hparams
.
skip_reduction_layer_input
:
prev_layer
=
cell_outputs
[
-
2
]
net
=
normal_cell
(
net
,
scope
=
'cell_{}'
.
format
(
cell_num
),
filter_scaling
=
filter_scaling
,
stride
=
stride
,
prev_layer
=
prev_layer
,
cell_num
=
true_cell_num
)
true_cell_num
+=
1
cell_outputs
.
append
(
net
)
# Final nonlinearity.
# Note that we have dropped the final pooling, dropout and softmax layers
# from the default nasnet version.
with
tf
.
variable_scope
(
'final_layer'
):
net
=
tf
.
nn
.
relu
(
net
)
return
net
# TODO: Only fixed_shape_resizer is currently supported for NASNet
# featurization. The reason for this is that nasnet.py only supports
# inputs with fully known shapes. We need to update nasnet.py to handle
# shapes not known at compile time.
class
FasterRCNNNASFeatureExtractor
(
faster_rcnn_meta_arch
.
FasterRCNNFeatureExtractor
):
"""Faster R-CNN with NASNet-A feature extractor implementation."""
def
__init__
(
self
,
is_training
,
first_stage_features_stride
,
batch_norm_trainable
=
False
,
reuse_weights
=
None
,
weight_decay
=
0.0
):
"""Constructor.
Args:
is_training: See base class.
first_stage_features_stride: See base class.
batch_norm_trainable: See base class.
reuse_weights: See base class.
weight_decay: See base class.
Raises:
ValueError: If `first_stage_features_stride` is not 16.
"""
if
first_stage_features_stride
!=
16
:
raise
ValueError
(
'`first_stage_features_stride` must be 16.'
)
super
(
FasterRCNNNASFeatureExtractor
,
self
).
__init__
(
is_training
,
first_stage_features_stride
,
batch_norm_trainable
,
reuse_weights
,
weight_decay
)
def
preprocess
(
self
,
resized_inputs
):
"""Faster R-CNN with NAS preprocessing.
Maps pixel values to the range [-1, 1].
Args:
resized_inputs: A [batch, height_in, width_in, channels] float32 tensor
representing a batch of images with values between 0 and 255.0.
Returns:
preprocessed_inputs: A [batch, height_out, width_out, channels] float32
tensor representing a batch of images.
"""
return
(
2.0
/
255.0
)
*
resized_inputs
-
1.0
def
_extract_proposal_features
(
self
,
preprocessed_inputs
,
scope
):
"""Extracts first stage RPN features.
Extracts features using the first half of the NASNet network.
We construct the network in `align_feature_maps=True` mode, which means
that all VALID paddings in the network are changed to SAME padding so that
the feature maps are aligned.
Args:
preprocessed_inputs: A [batch, height, width, channels] float32 tensor
representing a batch of images.
scope: A scope name.
Returns:
rpn_feature_map: A tensor with shape [batch, height, width, depth]
Raises:
ValueError: If the created network is missing the required activation.
"""
del
scope
if
len
(
preprocessed_inputs
.
get_shape
().
as_list
())
!=
4
:
raise
ValueError
(
'`preprocessed_inputs` must be 4 dimensional, got a '
'tensor of shape %s'
%
preprocessed_inputs
.
get_shape
())
with
slim
.
arg_scope
(
nasnet
.
nasnet_large_arg_scope
()):
_
,
end_points
=
nasnet
.
build_nasnet_large
(
preprocessed_inputs
,
num_classes
=
None
,
is_training
=
self
.
_is_training
,
is_batchnorm_training
=
self
.
_train_batch_norm
,
final_endpoint
=
'Cell_11'
)
# Note that both 'Cell_10' and 'Cell_11' have equal depth = 2016.
rpn_feature_map
=
tf
.
concat
([
end_points
[
'Cell_10'
],
end_points
[
'Cell_11'
]],
3
)
# nasnet.py does not maintain the batch size in the first dimension.
# This work around permits us retaining the batch for below.
batch
=
preprocessed_inputs
.
get_shape
().
as_list
()[
0
]
shape_without_batch
=
rpn_feature_map
.
get_shape
().
as_list
()[
1
:]
rpn_feature_map_shape
=
[
batch
]
+
shape_without_batch
rpn_feature_map
.
set_shape
(
rpn_feature_map_shape
)
return
rpn_feature_map
def
_extract_box_classifier_features
(
self
,
proposal_feature_maps
,
scope
):
"""Extracts second stage box classifier features.
This function reconstructs the "second half" of the NASNet-A
network after the part defined in `_extract_proposal_features`.
Args:
proposal_feature_maps: A 4-D float tensor with shape
[batch_size * self.max_num_proposals, crop_height, crop_width, depth]
representing the feature map cropped to each proposal.
scope: A scope name.
Returns:
proposal_classifier_features: A 4-D float tensor with shape
[batch_size * self.max_num_proposals, height, width, depth]
representing box classifier features for each proposal.
"""
del
scope
# Note that we always feed into 2 layers of equal depth
# where the first N channels corresponds to previous hidden layer
# and the second N channels correspond to the final hidden layer.
hidden_previous
,
hidden
=
tf
.
split
(
proposal_feature_maps
,
2
,
axis
=
3
)
# Note that what follows is largely a copy of build_nasnet_large() within
# nasnet.py. We are copying to minimize code pollution in slim.
# pylint: disable=protected-access
hparams
=
nasnet
.
_large_imagenet_config
(
is_training
=
self
.
_is_training
)
# pylint: enable=protected-access
# Calculate the total number of cells in the network
# -- Add 2 for the reduction cells.
total_num_cells
=
hparams
.
num_cells
+
2
# -- And add 2 for the stem cells for ImageNet training.
total_num_cells
+=
2
normal_cell
=
nasnet_utils
.
NasNetANormalCell
(
hparams
.
num_conv_filters
,
hparams
.
drop_path_keep_prob
,
total_num_cells
,
hparams
.
total_training_steps
)
reduction_cell
=
nasnet_utils
.
NasNetAReductionCell
(
hparams
.
num_conv_filters
,
hparams
.
drop_path_keep_prob
,
total_num_cells
,
hparams
.
total_training_steps
)
with
arg_scope
([
slim
.
dropout
,
nasnet_utils
.
drop_path
],
is_training
=
self
.
_is_training
):
with
arg_scope
([
slim
.
batch_norm
],
is_training
=
self
.
_train_batch_norm
):
with
arg_scope
([
slim
.
avg_pool2d
,
slim
.
max_pool2d
,
slim
.
conv2d
,
slim
.
batch_norm
,
slim
.
separable_conv2d
,
nasnet_utils
.
factorized_reduction
,
nasnet_utils
.
global_avg_pool
,
nasnet_utils
.
get_channel_index
,
nasnet_utils
.
get_channel_dim
],
data_format
=
hparams
.
data_format
):
# This corresponds to the cell number just past 'Cell_11' used by
# by _extract_proposal_features().
start_cell_num
=
12
# Note that this number equals:
# start_cell_num + 2 stem cells + 1 reduction cell
true_cell_num
=
15
with
slim
.
arg_scope
(
nasnet
.
nasnet_large_arg_scope
()):
net
=
_build_nasnet_base
(
hidden_previous
,
hidden
,
normal_cell
=
normal_cell
,
reduction_cell
=
reduction_cell
,
hparams
=
hparams
,
true_cell_num
=
true_cell_num
,
start_cell_num
=
start_cell_num
)
proposal_classifier_features
=
net
return
proposal_classifier_features
def
restore_from_classification_checkpoint_fn
(
self
,
first_stage_feature_extractor_scope
,
second_stage_feature_extractor_scope
):
"""Returns a map of variables to load from a foreign checkpoint.
Note that this overrides the default implementation in
faster_rcnn_meta_arch.FasterRCNNFeatureExtractor which does not work for
NASNet-A checkpoints.
Args:
first_stage_feature_extractor_scope: A scope name for the first stage
feature extractor.
second_stage_feature_extractor_scope: A scope name for the second stage
feature extractor.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
the model graph.
"""
# Note that the NAS checkpoint only contains the moving average version of
# the Variables so we need to generate an appropriate dictionary mapping.
variables_to_restore
=
{}
for
variable
in
tf
.
global_variables
():
if
variable
.
op
.
name
.
startswith
(
first_stage_feature_extractor_scope
):
var_name
=
variable
.
op
.
name
.
replace
(
first_stage_feature_extractor_scope
+
'/'
,
''
)
var_name
+=
'/ExponentialMovingAverage'
variables_to_restore
[
var_name
]
=
variable
if
variable
.
op
.
name
.
startswith
(
second_stage_feature_extractor_scope
):
var_name
=
variable
.
op
.
name
.
replace
(
second_stage_feature_extractor_scope
+
'/'
,
''
)
var_name
+=
'/ExponentialMovingAverage'
variables_to_restore
[
var_name
]
=
variable
return
variables_to_restore
research/object_detection/models/faster_rcnn_nas_feature_extractor_test.py
0 → 100644
View file @
74a03640
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for models.faster_rcnn_nas_feature_extractor."""
import
tensorflow
as
tf
from
object_detection.models
import
faster_rcnn_nas_feature_extractor
as
frcnn_nas
class
FasterRcnnNASFeatureExtractorTest
(
tf
.
test
.
TestCase
):
def
_build_feature_extractor
(
self
,
first_stage_features_stride
):
return
frcnn_nas
.
FasterRCNNNASFeatureExtractor
(
is_training
=
False
,
first_stage_features_stride
=
first_stage_features_stride
,
batch_norm_trainable
=
False
,
reuse_weights
=
None
,
weight_decay
=
0.0
)
def
test_extract_proposal_features_returns_expected_size
(
self
):
feature_extractor
=
self
.
_build_feature_extractor
(
first_stage_features_stride
=
16
)
preprocessed_inputs
=
tf
.
random_uniform
(
[
1
,
299
,
299
,
3
],
maxval
=
255
,
dtype
=
tf
.
float32
)
rpn_feature_map
=
feature_extractor
.
extract_proposal_features
(
preprocessed_inputs
,
scope
=
'TestScope'
)
features_shape
=
tf
.
shape
(
rpn_feature_map
)
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
features_shape_out
=
sess
.
run
(
features_shape
)
self
.
assertAllEqual
(
features_shape_out
,
[
1
,
19
,
19
,
4032
])
def
test_extract_proposal_features_input_size_224
(
self
):
feature_extractor
=
self
.
_build_feature_extractor
(
first_stage_features_stride
=
16
)
preprocessed_inputs
=
tf
.
random_uniform
(
[
1
,
224
,
224
,
3
],
maxval
=
255
,
dtype
=
tf
.
float32
)
rpn_feature_map
=
feature_extractor
.
extract_proposal_features
(
preprocessed_inputs
,
scope
=
'TestScope'
)
features_shape
=
tf
.
shape
(
rpn_feature_map
)
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
features_shape_out
=
sess
.
run
(
features_shape
)
self
.
assertAllEqual
(
features_shape_out
,
[
1
,
14
,
14
,
4032
])
def
test_extract_proposal_features_input_size_112
(
self
):
feature_extractor
=
self
.
_build_feature_extractor
(
first_stage_features_stride
=
16
)
preprocessed_inputs
=
tf
.
random_uniform
(
[
1
,
112
,
112
,
3
],
maxval
=
255
,
dtype
=
tf
.
float32
)
rpn_feature_map
=
feature_extractor
.
extract_proposal_features
(
preprocessed_inputs
,
scope
=
'TestScope'
)
features_shape
=
tf
.
shape
(
rpn_feature_map
)
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
features_shape_out
=
sess
.
run
(
features_shape
)
self
.
assertAllEqual
(
features_shape_out
,
[
1
,
7
,
7
,
4032
])
def
test_extract_proposal_features_dies_on_invalid_stride
(
self
):
with
self
.
assertRaises
(
ValueError
):
self
.
_build_feature_extractor
(
first_stage_features_stride
=
99
)
def
test_extract_proposal_features_dies_with_incorrect_rank_inputs
(
self
):
feature_extractor
=
self
.
_build_feature_extractor
(
first_stage_features_stride
=
16
)
preprocessed_inputs
=
tf
.
random_uniform
(
[
224
,
224
,
3
],
maxval
=
255
,
dtype
=
tf
.
float32
)
with
self
.
assertRaises
(
ValueError
):
feature_extractor
.
extract_proposal_features
(
preprocessed_inputs
,
scope
=
'TestScope'
)
def
test_extract_box_classifier_features_returns_expected_size
(
self
):
feature_extractor
=
self
.
_build_feature_extractor
(
first_stage_features_stride
=
16
)
proposal_feature_maps
=
tf
.
random_uniform
(
[
2
,
17
,
17
,
1088
],
maxval
=
255
,
dtype
=
tf
.
float32
)
proposal_classifier_features
=
(
feature_extractor
.
extract_box_classifier_features
(
proposal_feature_maps
,
scope
=
'TestScope'
))
features_shape
=
tf
.
shape
(
proposal_classifier_features
)
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
features_shape_out
=
sess
.
run
(
features_shape
)
self
.
assertAllEqual
(
features_shape_out
,
[
2
,
9
,
9
,
4032
])
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py
View file @
74a03640
...
...
@@ -42,6 +42,7 @@ class FasterRCNNResnetV1FeatureExtractor(
resnet_model
,
is_training
,
first_stage_features_stride
,
batch_norm_trainable
=
False
,
reuse_weights
=
None
,
weight_decay
=
0.0
):
"""Constructor.
...
...
@@ -51,6 +52,7 @@ class FasterRCNNResnetV1FeatureExtractor(
resnet_model: Definition of the Resnet V1 model.
is_training: See base class.
first_stage_features_stride: See base class.
batch_norm_trainable: See base class.
reuse_weights: See base class.
weight_decay: See base class.
...
...
@@ -62,7 +64,8 @@ class FasterRCNNResnetV1FeatureExtractor(
self
.
_architecture
=
architecture
self
.
_resnet_model
=
resnet_model
super
(
FasterRCNNResnetV1FeatureExtractor
,
self
).
__init__
(
is_training
,
first_stage_features_stride
,
reuse_weights
,
weight_decay
)
is_training
,
first_stage_features_stride
,
batch_norm_trainable
,
reuse_weights
,
weight_decay
)
def
preprocess
(
self
,
resized_inputs
):
"""Faster R-CNN Resnet V1 preprocessing.
...
...
@@ -119,7 +122,7 @@ class FasterRCNNResnetV1FeatureExtractor(
_
,
activations
=
self
.
_resnet_model
(
preprocessed_inputs
,
num_classes
=
None
,
is_training
=
False
,
is_training
=
self
.
_train_batch_norm
,
global_pool
=
False
,
output_stride
=
self
.
_first_stage_features_stride
,
spatial_squeeze
=
False
,
...
...
@@ -148,7 +151,8 @@ class FasterRCNNResnetV1FeatureExtractor(
batch_norm_epsilon
=
1e-5
,
batch_norm_scale
=
True
,
weight_decay
=
self
.
_weight_decay
)):
with
slim
.
arg_scope
([
slim
.
batch_norm
],
is_training
=
False
):
with
slim
.
arg_scope
([
slim
.
batch_norm
],
is_training
=
self
.
_train_batch_norm
):
blocks
=
[
resnet_utils
.
Block
(
'block4'
,
resnet_v1
.
bottleneck
,
[{
'depth'
:
2048
,
...
...
@@ -167,6 +171,7 @@ class FasterRCNNResnet50FeatureExtractor(FasterRCNNResnetV1FeatureExtractor):
def
__init__
(
self
,
is_training
,
first_stage_features_stride
,
batch_norm_trainable
=
False
,
reuse_weights
=
None
,
weight_decay
=
0.0
):
"""Constructor.
...
...
@@ -174,6 +179,7 @@ class FasterRCNNResnet50FeatureExtractor(FasterRCNNResnetV1FeatureExtractor):
Args:
is_training: See base class.
first_stage_features_stride: See base class.
batch_norm_trainable: See base class.
reuse_weights: See base class.
weight_decay: See base class.
...
...
@@ -183,7 +189,8 @@ class FasterRCNNResnet50FeatureExtractor(FasterRCNNResnetV1FeatureExtractor):
"""
super
(
FasterRCNNResnet50FeatureExtractor
,
self
).
__init__
(
'resnet_v1_50'
,
resnet_v1
.
resnet_v1_50
,
is_training
,
first_stage_features_stride
,
reuse_weights
,
weight_decay
)
first_stage_features_stride
,
batch_norm_trainable
,
reuse_weights
,
weight_decay
)
class
FasterRCNNResnet101FeatureExtractor
(
FasterRCNNResnetV1FeatureExtractor
):
...
...
@@ -192,6 +199,7 @@ class FasterRCNNResnet101FeatureExtractor(FasterRCNNResnetV1FeatureExtractor):
def
__init__
(
self
,
is_training
,
first_stage_features_stride
,
batch_norm_trainable
=
False
,
reuse_weights
=
None
,
weight_decay
=
0.0
):
"""Constructor.
...
...
@@ -199,6 +207,7 @@ class FasterRCNNResnet101FeatureExtractor(FasterRCNNResnetV1FeatureExtractor):
Args:
is_training: See base class.
first_stage_features_stride: See base class.
batch_norm_trainable: See base class.
reuse_weights: See base class.
weight_decay: See base class.
...
...
@@ -208,7 +217,8 @@ class FasterRCNNResnet101FeatureExtractor(FasterRCNNResnetV1FeatureExtractor):
"""
super
(
FasterRCNNResnet101FeatureExtractor
,
self
).
__init__
(
'resnet_v1_101'
,
resnet_v1
.
resnet_v1_101
,
is_training
,
first_stage_features_stride
,
reuse_weights
,
weight_decay
)
first_stage_features_stride
,
batch_norm_trainable
,
reuse_weights
,
weight_decay
)
class
FasterRCNNResnet152FeatureExtractor
(
FasterRCNNResnetV1FeatureExtractor
):
...
...
@@ -217,6 +227,7 @@ class FasterRCNNResnet152FeatureExtractor(FasterRCNNResnetV1FeatureExtractor):
def
__init__
(
self
,
is_training
,
first_stage_features_stride
,
batch_norm_trainable
=
False
,
reuse_weights
=
None
,
weight_decay
=
0.0
):
"""Constructor.
...
...
@@ -224,6 +235,7 @@ class FasterRCNNResnet152FeatureExtractor(FasterRCNNResnetV1FeatureExtractor):
Args:
is_training: See base class.
first_stage_features_stride: See base class.
batch_norm_trainable: See base class.
reuse_weights: See base class.
weight_decay: See base class.
...
...
@@ -233,4 +245,5 @@ class FasterRCNNResnet152FeatureExtractor(FasterRCNNResnetV1FeatureExtractor):
"""
super
(
FasterRCNNResnet152FeatureExtractor
,
self
).
__init__
(
'resnet_v1_152'
,
resnet_v1
.
resnet_v1_152
,
is_training
,
first_stage_features_stride
,
reuse_weights
,
weight_decay
)
first_stage_features_stride
,
batch_norm_trainable
,
reuse_weights
,
weight_decay
)
research/object_detection/models/faster_rcnn_resnet_v1_feature_extractor_test.py
View file @
74a03640
...
...
@@ -37,6 +37,7 @@ class FasterRcnnResnetV1FeatureExtractorTest(tf.test.TestCase):
return
feature_extractor_map
[
architecture
](
is_training
=
False
,
first_stage_features_stride
=
first_stage_features_stride
,
batch_norm_trainable
=
False
,
reuse_weights
=
None
,
weight_decay
=
0.0
)
...
...
research/object_detection/models/feature_map_generators.py
View file @
74a03640
...
...
@@ -25,7 +25,6 @@ of final feature maps.
"""
import
collections
import
tensorflow
as
tf
from
object_detection.utils
import
ops
slim
=
tf
.
contrib
.
slim
...
...
@@ -59,12 +58,13 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
based on the spatial shape and depth configuration. Note that the current
implementation only supports generating new layers using convolution of
stride 2 resulting in a spatial resolution reduction by a factor of 2.
By default convolution kernel size is set to 3, and it can be customized
by caller.
An example of the configuration for Inception V3:
{
'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 128],
'anchor_strides': [16, 32, 64, -1, -1, -1]
'layer_depth': [-1, -1, -1, 512, 256, 128]
}
Args:
...
...
@@ -72,14 +72,12 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
layouts in the following format (Inception V2/V3 respectively):
{
'from_layer': ['Mixed_3c', 'Mixed_4c', 'Mixed_5c', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 128],
'anchor_strides': [16, 32, 64, -1, -1, -1]
'layer_depth': [-1, -1, -1, 512, 256, 128]
}
or
{
'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 128],
'anchor_strides': [16, 32, 64, -1, -1, -1]
'layer_depth': [-1, -1, -1, 512, 256, 128]
}
If 'from_layer' is specified, the specified feature map is directly used
as a box predictor layer, and the layer_depth is directly infered from the
...
...
@@ -90,14 +88,11 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
Note that the current implementation only supports generating new layers
using convolutions of stride 2 (resulting in a spatial resolution
reduction by a factor of 2), and will be extended to a more flexible
design. Finally, the optional 'anchor_strides' can be used to specify the
anchor stride at each layer where 'from_layer' is specified. Our
convention is to set 'anchor_strides' to -1 whenever at the positions that
'from_layer' is an empty string, and anchor strides at these layers will
be inferred from the previous layer's anchor strides and the current
layer's stride length. In the case where 'anchor_strides' is not
specified, the anchor strides will default to the image width and height
divided by the number of anchors.
design. Convolution kernel size is set to 3 by default, and can be
customized by 'conv_kernel_size' parameter (similarily, 'conv_kernel_size'
should be set to -1 if 'from_layer' is specified). The created convolution
operation will be a normal 2D convolution by default, and a depthwise
convolution followed by 1x1 convolution if 'use_depthwise' is set to True.
depth_multiplier: Depth multiplier for convolutional layers.
min_depth: Minimum depth for convolutional layers.
insert_1x1_conv: A boolean indicating whether an additional 1x1 convolution
...
...
@@ -120,14 +115,14 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
feature_map_keys
=
[]
feature_maps
=
[]
base_from_layer
=
''
feature_map_strides
=
None
use_depthwise
=
False
if
'anchor_strides'
in
feature_map_layout
:
feature_map_strides
=
(
feature_map_layout
[
'anchor_strides'
])
if
'use_depthwise'
in
feature_map_layout
:
use_depthwise
=
feature_map_layout
[
'use_depthwise'
]
for
index
,
(
from_layer
,
layer_depth
)
in
enumerate
(
zip
(
feature_map_layout
[
'from_layer'
],
feature_map_layout
[
'layer_depth'
])):
for
index
,
from_layer
in
enumerate
(
feature_map_layout
[
'from_layer'
]):
layer_depth
=
feature_map_layout
[
'layer_depth'
][
index
]
conv_kernel_size
=
3
if
'conv_kernel_size'
in
feature_map_layout
:
conv_kernel_size
=
feature_map_layout
[
'conv_kernel_size'
][
index
]
if
from_layer
:
feature_map
=
image_features
[
from_layer
]
base_from_layer
=
from_layer
...
...
@@ -145,12 +140,13 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
stride
=
1
,
scope
=
layer_name
)
stride
=
2
layer_name
=
'{}_2_Conv2d_{}_3x3_s2_{}'
.
format
(
base_from_layer
,
index
,
depth_fn
(
layer_depth
))
layer_name
=
'{}_2_Conv2d_{}_{}x{}_s2_{}'
.
format
(
base_from_layer
,
index
,
conv_kernel_size
,
conv_kernel_size
,
depth_fn
(
layer_depth
))
if
use_depthwise
:
feature_map
=
slim
.
separable_conv2d
(
ops
.
pad_to_multiple
(
intermediate_layer
,
stride
),
None
,
[
3
,
3
],
intermediate_layer
,
None
,
[
conv_kernel_size
,
conv_kernel_size
],
depth_multiplier
=
1
,
padding
=
'SAME'
,
stride
=
stride
,
...
...
@@ -163,16 +159,11 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
scope
=
layer_name
)
else
:
feature_map
=
slim
.
conv2d
(
ops
.
pad_to_multiple
(
intermediate_layer
,
stride
),
depth_fn
(
layer_depth
),
[
3
,
3
],
intermediate_layer
,
depth_fn
(
layer_depth
),
[
conv_kernel_size
,
conv_kernel_size
],
padding
=
'SAME'
,
stride
=
stride
,
scope
=
layer_name
)
if
(
index
>
0
and
feature_map_strides
and
feature_map_strides
[
index
-
1
]
>
0
):
feature_map_strides
[
index
]
=
(
stride
*
feature_map_strides
[
index
-
1
])
feature_map_keys
.
append
(
layer_name
)
feature_maps
.
append
(
feature_map
)
return
collections
.
OrderedDict
(
...
...
research/object_detection/models/feature_map_generators_test.py
View file @
74a03640
...
...
@@ -33,8 +33,14 @@ INCEPTION_V3_LAYOUT = {
'aspect_ratios'
:
[
1.0
,
2.0
,
1.0
/
2
,
3.0
,
1.0
/
3
]
}
EMBEDDED_SSD_MOBILENET_V1_LAYOUT
=
{
'from_layer'
:
[
'Conv2d_11_pointwise'
,
'Conv2d_13_pointwise'
,
''
,
''
,
''
],
'layer_depth'
:
[
-
1
,
-
1
,
512
,
256
,
256
],
'conv_kernel_size'
:
[
-
1
,
-
1
,
3
,
3
,
2
],
}
# TODO: add tests with different anchor strides.
# TODO
(rathodv)
: add tests with different anchor strides.
class
MultiResolutionFeatureMapGeneratorTest
(
tf
.
test
.
TestCase
):
def
test_get_expected_feature_map_shapes_with_inception_v2
(
self
):
...
...
@@ -96,6 +102,37 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
(
key
,
value
.
shape
)
for
key
,
value
in
out_feature_maps
.
items
())
self
.
assertDictEqual
(
out_feature_map_shapes
,
expected_feature_map_shapes
)
def
test_get_expected_feature_map_shapes_with_embedded_ssd_mobilenet_v1
(
self
):
image_features
=
{
'Conv2d_11_pointwise'
:
tf
.
random_uniform
([
4
,
16
,
16
,
512
],
dtype
=
tf
.
float32
),
'Conv2d_13_pointwise'
:
tf
.
random_uniform
([
4
,
8
,
8
,
1024
],
dtype
=
tf
.
float32
),
}
feature_maps
=
feature_map_generators
.
multi_resolution_feature_maps
(
feature_map_layout
=
EMBEDDED_SSD_MOBILENET_V1_LAYOUT
,
depth_multiplier
=
1
,
min_depth
=
32
,
insert_1x1_conv
=
True
,
image_features
=
image_features
)
expected_feature_map_shapes
=
{
'Conv2d_11_pointwise'
:
(
4
,
16
,
16
,
512
),
'Conv2d_13_pointwise'
:
(
4
,
8
,
8
,
1024
),
'Conv2d_13_pointwise_2_Conv2d_2_3x3_s2_512'
:
(
4
,
4
,
4
,
512
),
'Conv2d_13_pointwise_2_Conv2d_3_3x3_s2_256'
:
(
4
,
2
,
2
,
256
),
'Conv2d_13_pointwise_2_Conv2d_4_2x2_s2_256'
:
(
4
,
1
,
1
,
256
)}
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
out_feature_maps
=
sess
.
run
(
feature_maps
)
out_feature_map_shapes
=
dict
(
(
key
,
value
.
shape
)
for
key
,
value
in
out_feature_maps
.
items
())
self
.
assertDictEqual
(
out_feature_map_shapes
,
expected_feature_map_shapes
)
class
GetDepthFunctionTest
(
tf
.
test
.
TestCase
):
...
...
research/object_detection/models/ssd_feature_extractor_test.py
View file @
74a03640
...
...
@@ -46,34 +46,32 @@ class SsdFeatureExtractorTestBase(object):
self
.
assertAllEqual
(
shape_out
,
exp_shape_out
)
@
abstractmethod
def
_create_feature_extractor
(
self
,
depth_multiplier
):
def
_create_feature_extractor
(
self
,
depth_multiplier
,
pad_to_multiple
):
"""Constructs a new feature extractor.
Args:
depth_multiplier: float depth multiplier for feature extractor
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
Returns:
an ssd_meta_arch.SSDFeatureExtractor object.
"""
pass
def
check_extract_features_returns_correct_shape
(
self
,
image_height
,
image_width
,
depth_multiplier
,
self
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shapes_out
):
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
)
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
pad_to_multiple
)
preprocessed_inputs
=
tf
.
random_uniform
(
[
4
,
image_height
,
image_width
,
3
],
dtype
=
tf
.
float32
)
self
.
_validate_features_shape
(
feature_extractor
,
preprocessed_inputs
,
expected_feature_map_shapes_out
)
def
check_extract_features_raises_error_with_invalid_image_size
(
self
,
image_height
,
image_width
,
depth_multiplier
):
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
)
self
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
):
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
pad_to_multiple
)
preprocessed_inputs
=
tf
.
placeholder
(
tf
.
float32
,
(
4
,
None
,
None
,
3
))
feature_maps
=
feature_extractor
.
extract_features
(
preprocessed_inputs
)
test_preprocessed_image
=
np
.
random
.
rand
(
4
,
image_height
,
image_width
,
3
)
...
...
@@ -83,12 +81,12 @@ class SsdFeatureExtractorTestBase(object):
sess
.
run
(
feature_maps
,
feed_dict
=
{
preprocessed_inputs
:
test_preprocessed_image
})
def
check_feature_extractor_variables_under_scope
(
self
,
depth_multiplier
,
scope_name
):
def
check_feature_extractor_variables_under_scope
(
self
,
depth_multiplier
,
pad_to_multiple
,
scope_name
):
g
=
tf
.
Graph
()
with
g
.
as_default
():
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
)
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
pad_to_multiple
)
preprocessed_inputs
=
tf
.
placeholder
(
tf
.
float32
,
(
4
,
None
,
None
,
3
))
feature_extractor
.
extract_features
(
preprocessed_inputs
)
variables
=
g
.
get_collection
(
tf
.
GraphKeys
.
GLOBAL_VARIABLES
)
...
...
research/object_detection/models/ssd_inception_v2_feature_extractor.py
View file @
74a03640
...
...
@@ -18,6 +18,7 @@ import tensorflow as tf
from
object_detection.meta_architectures
import
ssd_meta_arch
from
object_detection.models
import
feature_map_generators
from
object_detection.utils
import
ops
from
nets
import
inception_v2
slim
=
tf
.
contrib
.
slim
...
...
@@ -27,20 +28,31 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""SSD Feature Extractor using InceptionV2 features."""
def
__init__
(
self
,
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
=
True
,
reuse_weights
=
None
):
"""InceptionV2 Feature Extractor for SSD Models.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
"""
super
(
SSDInceptionV2FeatureExtractor
,
self
).
__init__
(
depth_multiplier
,
min_depth
,
conv_hyperparams
,
reuse_weights
)
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
,
reuse_weights
)
def
preprocess
(
self
,
resized_inputs
):
"""SSD preprocessing.
...
...
@@ -84,7 +96,7 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
with
tf
.
variable_scope
(
'InceptionV2'
,
reuse
=
self
.
_reuse_weights
)
as
scope
:
_
,
image_features
=
inception_v2
.
inception_v2_base
(
preprocessed_inputs
,
ops
.
pad_to_multiple
(
preprocessed_inputs
,
self
.
_pad_to_multiple
)
,
final_endpoint
=
'Mixed_5c'
,
min_depth
=
self
.
_min_depth
,
depth_multiplier
=
self
.
_depth_multiplier
,
...
...
research/object_detection/models/ssd_inception_v2_feature_extractor_test.py
View file @
74a03640
...
...
@@ -22,73 +22,101 @@ from object_detection.models import ssd_inception_v2_feature_extractor
class
SsdInceptionV2FeatureExtractorTest
(
ssd_feature_extractor_test
.
SsdFeatureExtractorTestBase
,
tf
.
test
.
TestCase
):
ssd_feature_extractor_test
.
SsdFeatureExtractorTestBase
,
tf
.
test
.
TestCase
):
def
_create_feature_extractor
(
self
,
depth_multiplier
):
def
_create_feature_extractor
(
self
,
depth_multiplier
,
pad_to_multiple
,
is_training
=
True
,
batch_norm_trainable
=
True
):
"""Constructs a SsdInceptionV2FeatureExtractor.
Args:
depth_multiplier: float depth multiplier for feature extractor
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
is_training: whether the network is in training mode.
batch_norm_trainable: Whether to update batch norm parameters during
training or not
Returns:
an ssd_inception_v2_feature_extractor.SsdInceptionV2FeatureExtractor.
"""
min_depth
=
32
conv_hyperparams
=
{}
return
ssd_inception_v2_feature_extractor
.
SSDInceptionV2FeatureExtractor
(
depth_multiplier
,
min_depth
,
conv_hyperparams
)
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
)
def
test_extract_features_returns_correct_shapes_128
(
self
):
image_height
=
128
image_width
=
128
depth_multiplier
=
1.0
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
4
,
8
,
8
,
576
),
(
4
,
4
,
4
,
1024
),
(
4
,
2
,
2
,
512
),
(
4
,
1
,
1
,
256
),
(
4
,
1
,
1
,
256
),
(
4
,
1
,
1
,
128
)]
self
.
check_extract_features_returns_correct_shape
(
image_height
,
image_width
,
depth_multiplier
,
expected_feature_map_shape
)
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
def
test_extract_features_returns_correct_shapes_299
(
self
):
image_height
=
299
image_width
=
299
depth_multiplier
=
1.0
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
4
,
19
,
19
,
576
),
(
4
,
10
,
10
,
1024
),
(
4
,
5
,
5
,
512
),
(
4
,
3
,
3
,
256
),
(
4
,
2
,
2
,
256
),
(
4
,
1
,
1
,
128
)]
self
.
check_extract_features_returns_correct_shape
(
image_height
,
image_width
,
depth_multiplier
,
expected_feature_map_shape
)
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
def
test_extract_features_returns_correct_shapes_enforcing_min_depth
(
self
):
image_height
=
299
image_width
=
299
depth_multiplier
=
0.5
**
12
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
4
,
19
,
19
,
128
),
(
4
,
10
,
10
,
128
),
(
4
,
5
,
5
,
32
),
(
4
,
3
,
3
,
32
),
(
4
,
2
,
2
,
32
),
(
4
,
1
,
1
,
32
)]
self
.
check_extract_features_returns_correct_shape
(
image_height
,
image_width
,
depth_multiplier
,
expected_feature_map_shape
)
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
def
test_extract_features_returns_correct_shapes_with_pad_to_multiple
(
self
):
image_height
=
299
image_width
=
299
depth_multiplier
=
1.0
pad_to_multiple
=
32
expected_feature_map_shape
=
[(
4
,
20
,
20
,
576
),
(
4
,
10
,
10
,
1024
),
(
4
,
5
,
5
,
512
),
(
4
,
3
,
3
,
256
),
(
4
,
2
,
2
,
256
),
(
4
,
1
,
1
,
128
)]
self
.
check_extract_features_returns_correct_shape
(
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
def
test_extract_features_raises_error_with_invalid_image_size
(
self
):
image_height
=
32
image_width
=
32
depth_multiplier
=
1.0
pad_to_multiple
=
1
self
.
check_extract_features_raises_error_with_invalid_image_size
(
image_height
,
image_width
,
depth_multiplier
)
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
)
def
test_preprocess_returns_correct_value_range
(
self
):
image_height
=
128
image_width
=
128
depth_multiplier
=
1
pad_to_multiple
=
1
test_image
=
np
.
random
.
rand
(
4
,
image_height
,
image_width
,
3
)
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
)
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
pad_to_multiple
)
preprocessed_image
=
feature_extractor
.
preprocess
(
test_image
)
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
np
.
abs
(
preprocessed_image
),
1.0
)))
def
test_variables_only_created_in_scope
(
self
):
depth_multiplier
=
1
pad_to_multiple
=
1
scope_name
=
'InceptionV2'
self
.
check_feature_extractor_variables_under_scope
(
depth_multiplier
,
scope_name
)
self
.
check_feature_extractor_variables_under_scope
(
depth_multiplier
,
pad_to_multiple
,
scope_name
)
if
__name__
==
'__main__'
:
...
...
research/object_detection/models/ssd_inception_v3_feature_extractor.py
0 → 100644
View file @
74a03640
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""SSDFeatureExtractor for InceptionV3 features."""
import
tensorflow
as
tf
from
object_detection.meta_architectures
import
ssd_meta_arch
from
object_detection.models
import
feature_map_generators
from
object_detection.utils
import
ops
from
nets
import
inception_v3
slim
=
tf
.
contrib
.
slim
class
SSDInceptionV3FeatureExtractor
(
ssd_meta_arch
.
SSDFeatureExtractor
):
"""SSD Feature Extractor using InceptionV3 features."""
def
__init__
(
self
,
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
=
True
,
reuse_weights
=
None
):
"""InceptionV3 Feature Extractor for SSD Models.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
"""
super
(
SSDInceptionV3FeatureExtractor
,
self
).
__init__
(
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
,
reuse_weights
)
def
preprocess
(
self
,
resized_inputs
):
"""SSD preprocessing.
Maps pixel values to the range [-1, 1].
Args:
resized_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
return
(
2.0
/
255.0
)
*
resized_inputs
-
1.0
def
extract_features
(
self
,
preprocessed_inputs
):
"""Extract features from preprocessed inputs.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
preprocessed_inputs
.
get_shape
().
assert_has_rank
(
4
)
shape_assert
=
tf
.
Assert
(
tf
.
logical_and
(
tf
.
greater_equal
(
tf
.
shape
(
preprocessed_inputs
)[
1
],
33
),
tf
.
greater_equal
(
tf
.
shape
(
preprocessed_inputs
)[
2
],
33
)),
[
'image size must at least be 33 in both height and width.'
])
feature_map_layout
=
{
'from_layer'
:
[
'Mixed_5d'
,
'Mixed_6e'
,
'Mixed_7c'
,
''
,
''
,
''
],
'layer_depth'
:
[
-
1
,
-
1
,
-
1
,
512
,
256
,
128
],
}
with
tf
.
control_dependencies
([
shape_assert
]):
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
):
with
tf
.
variable_scope
(
'InceptionV3'
,
reuse
=
self
.
_reuse_weights
)
as
scope
:
_
,
image_features
=
inception_v3
.
inception_v3_base
(
ops
.
pad_to_multiple
(
preprocessed_inputs
,
self
.
_pad_to_multiple
),
final_endpoint
=
'Mixed_7c'
,
min_depth
=
self
.
_min_depth
,
depth_multiplier
=
self
.
_depth_multiplier
,
scope
=
scope
)
feature_maps
=
feature_map_generators
.
multi_resolution_feature_maps
(
feature_map_layout
=
feature_map_layout
,
depth_multiplier
=
self
.
_depth_multiplier
,
min_depth
=
self
.
_min_depth
,
insert_1x1_conv
=
True
,
image_features
=
image_features
)
return
feature_maps
.
values
()
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment