Commit c839310b authored by Vivek Rathod's avatar Vivek Rathod
Browse files

feature extractor and model builder update.

parent ff88581a
...@@ -24,9 +24,12 @@ py_library( ...@@ -24,9 +24,12 @@ py_library(
"//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch", "//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch",
"//tensorflow_models/object_detection/meta_architectures:rfcn_meta_arch", "//tensorflow_models/object_detection/meta_architectures:rfcn_meta_arch",
"//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch", "//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch",
"//tensorflow_models/object_detection/models:embedded_ssd_mobilenet_v1_feature_extractor",
"//tensorflow_models/object_detection/models:faster_rcnn_inception_resnet_v2_feature_extractor", "//tensorflow_models/object_detection/models:faster_rcnn_inception_resnet_v2_feature_extractor",
"//tensorflow_models/object_detection/models:faster_rcnn_inception_v2_feature_extractor",
"//tensorflow_models/object_detection/models:faster_rcnn_resnet_v1_feature_extractor", "//tensorflow_models/object_detection/models:faster_rcnn_resnet_v1_feature_extractor",
"//tensorflow_models/object_detection/models:ssd_inception_v2_feature_extractor", "//tensorflow_models/object_detection/models:ssd_inception_v2_feature_extractor",
"//tensorflow_models/object_detection/models:ssd_inception_v3_feature_extractor",
"//tensorflow_models/object_detection/models:ssd_mobilenet_v1_feature_extractor", "//tensorflow_models/object_detection/models:ssd_mobilenet_v1_feature_extractor",
"//tensorflow_models/object_detection/protos:model_py_pb2", "//tensorflow_models/object_detection/protos:model_py_pb2",
], ],
...@@ -40,7 +43,11 @@ py_test( ...@@ -40,7 +43,11 @@ py_test(
"//tensorflow", "//tensorflow",
"//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch", "//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch",
"//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch", "//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch",
"//tensorflow_models/object_detection/models:faster_rcnn_inception_resnet_v2_feature_extractor",
"//tensorflow_models/object_detection/models:faster_rcnn_inception_v2_feature_extractor",
"//tensorflow_models/object_detection/models:faster_rcnn_resnet_v1_feature_extractor",
"//tensorflow_models/object_detection/models:ssd_inception_v2_feature_extractor", "//tensorflow_models/object_detection/models:ssd_inception_v2_feature_extractor",
"//tensorflow_models/object_detection/models:ssd_inception_v3_feature_extractor",
"//tensorflow_models/object_detection/models:ssd_mobilenet_v1_feature_extractor", "//tensorflow_models/object_detection/models:ssd_mobilenet_v1_feature_extractor",
"//tensorflow_models/object_detection/protos:model_py_pb2", "//tensorflow_models/object_detection/protos:model_py_pb2",
], ],
......
...@@ -28,27 +28,35 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch ...@@ -28,27 +28,35 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch
from object_detection.meta_architectures import rfcn_meta_arch from object_detection.meta_architectures import rfcn_meta_arch
from object_detection.meta_architectures import ssd_meta_arch from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
from object_detection.models import faster_rcnn_inception_v2_feature_extractor as frcnn_inc_v2
from object_detection.models import faster_rcnn_nas_feature_extractor as frcnn_nas
from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1 from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1
from object_detection.models.embedded_ssd_mobilenet_v1_feature_extractor import EmbeddedSSDMobileNetV1FeatureExtractor
from object_detection.models.ssd_inception_v2_feature_extractor import SSDInceptionV2FeatureExtractor from object_detection.models.ssd_inception_v2_feature_extractor import SSDInceptionV2FeatureExtractor
from object_detection.models.ssd_inception_v3_feature_extractor import SSDInceptionV3FeatureExtractor
from object_detection.models.ssd_mobilenet_v1_feature_extractor import SSDMobileNetV1FeatureExtractor from object_detection.models.ssd_mobilenet_v1_feature_extractor import SSDMobileNetV1FeatureExtractor
from object_detection.protos import model_pb2 from object_detection.protos import model_pb2
# A map of names to SSD feature extractors. # A map of names to SSD feature extractors.
SSD_FEATURE_EXTRACTOR_CLASS_MAP = { SSD_FEATURE_EXTRACTOR_CLASS_MAP = {
'ssd_inception_v2': SSDInceptionV2FeatureExtractor, 'ssd_inception_v2': SSDInceptionV2FeatureExtractor,
'ssd_inception_v3': SSDInceptionV3FeatureExtractor,
'ssd_mobilenet_v1': SSDMobileNetV1FeatureExtractor, 'ssd_mobilenet_v1': SSDMobileNetV1FeatureExtractor,
'embedded_ssd_mobilenet_v1': EmbeddedSSDMobileNetV1FeatureExtractor,
} }
# A map of names to Faster R-CNN feature extractors. # A map of names to Faster R-CNN feature extractors.
FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP = { FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP = {
'faster_rcnn_inception_resnet_v2':
frcnn_inc_res.FasterRCNNInceptionResnetV2FeatureExtractor,
'faster_rcnn_inception_v2':
frcnn_inc_v2.FasterRCNNInceptionV2FeatureExtractor,
'faster_rcnn_resnet50': 'faster_rcnn_resnet50':
frcnn_resnet_v1.FasterRCNNResnet50FeatureExtractor, frcnn_resnet_v1.FasterRCNNResnet50FeatureExtractor,
'faster_rcnn_resnet101': 'faster_rcnn_resnet101':
frcnn_resnet_v1.FasterRCNNResnet101FeatureExtractor, frcnn_resnet_v1.FasterRCNNResnet101FeatureExtractor,
'faster_rcnn_resnet152': 'faster_rcnn_resnet152':
frcnn_resnet_v1.FasterRCNNResnet152FeatureExtractor, frcnn_resnet_v1.FasterRCNNResnet152FeatureExtractor,
'faster_rcnn_inception_resnet_v2':
frcnn_inc_res.FasterRCNNInceptionResnetV2FeatureExtractor
} }
...@@ -94,6 +102,8 @@ def _build_ssd_feature_extractor(feature_extractor_config, is_training, ...@@ -94,6 +102,8 @@ def _build_ssd_feature_extractor(feature_extractor_config, is_training,
feature_type = feature_extractor_config.type feature_type = feature_extractor_config.type
depth_multiplier = feature_extractor_config.depth_multiplier depth_multiplier = feature_extractor_config.depth_multiplier
min_depth = feature_extractor_config.min_depth min_depth = feature_extractor_config.min_depth
pad_to_multiple = feature_extractor_config.pad_to_multiple
batch_norm_trainable = feature_extractor_config.batch_norm_trainable
conv_hyperparams = hyperparams_builder.build( conv_hyperparams = hyperparams_builder.build(
feature_extractor_config.conv_hyperparams, is_training) feature_extractor_config.conv_hyperparams, is_training)
...@@ -101,8 +111,9 @@ def _build_ssd_feature_extractor(feature_extractor_config, is_training, ...@@ -101,8 +111,9 @@ def _build_ssd_feature_extractor(feature_extractor_config, is_training,
raise ValueError('Unknown ssd feature_extractor: {}'.format(feature_type)) raise ValueError('Unknown ssd feature_extractor: {}'.format(feature_type))
feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type] feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type]
return feature_extractor_class(depth_multiplier, min_depth, conv_hyperparams, return feature_extractor_class(is_training, depth_multiplier, min_depth,
reuse_weights) pad_to_multiple, conv_hyperparams,
batch_norm_trainable, reuse_weights)
def _build_ssd_model(ssd_config, is_training): def _build_ssd_model(ssd_config, is_training):
...@@ -180,6 +191,7 @@ def _build_faster_rcnn_feature_extractor( ...@@ -180,6 +191,7 @@ def _build_faster_rcnn_feature_extractor(
feature_type = feature_extractor_config.type feature_type = feature_extractor_config.type
first_stage_features_stride = ( first_stage_features_stride = (
feature_extractor_config.first_stage_features_stride) feature_extractor_config.first_stage_features_stride)
batch_norm_trainable = feature_extractor_config.batch_norm_trainable
if feature_type not in FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP: if feature_type not in FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP:
raise ValueError('Unknown Faster R-CNN feature_extractor: {}'.format( raise ValueError('Unknown Faster R-CNN feature_extractor: {}'.format(
...@@ -187,7 +199,8 @@ def _build_faster_rcnn_feature_extractor( ...@@ -187,7 +199,8 @@ def _build_faster_rcnn_feature_extractor(
feature_extractor_class = FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP[ feature_extractor_class = FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP[
feature_type] feature_type]
return feature_extractor_class( return feature_extractor_class(
is_training, first_stage_features_stride, reuse_weights) is_training, first_stage_features_stride,
batch_norm_trainable, reuse_weights)
def _build_faster_rcnn_model(frcnn_config, is_training): def _build_faster_rcnn_model(frcnn_config, is_training):
...@@ -248,8 +261,13 @@ def _build_faster_rcnn_model(frcnn_config, is_training): ...@@ -248,8 +261,13 @@ def _build_faster_rcnn_model(frcnn_config, is_training):
) = post_processing_builder.build(frcnn_config.second_stage_post_processing) ) = post_processing_builder.build(frcnn_config.second_stage_post_processing)
second_stage_localization_loss_weight = ( second_stage_localization_loss_weight = (
frcnn_config.second_stage_localization_loss_weight) frcnn_config.second_stage_localization_loss_weight)
second_stage_classification_loss = (
losses_builder.build_faster_rcnn_classification_loss(
frcnn_config.second_stage_classification_loss))
second_stage_classification_loss_weight = ( second_stage_classification_loss_weight = (
frcnn_config.second_stage_classification_loss_weight) frcnn_config.second_stage_classification_loss_weight)
second_stage_mask_prediction_loss_weight = (
frcnn_config.second_stage_mask_prediction_loss_weight)
hard_example_miner = None hard_example_miner = None
if frcnn_config.HasField('hard_example_miner'): if frcnn_config.HasField('hard_example_miner'):
...@@ -286,6 +304,8 @@ def _build_faster_rcnn_model(frcnn_config, is_training): ...@@ -286,6 +304,8 @@ def _build_faster_rcnn_model(frcnn_config, is_training):
'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
'second_stage_localization_loss_weight': 'second_stage_localization_loss_weight':
second_stage_localization_loss_weight, second_stage_localization_loss_weight,
'second_stage_classification_loss':
second_stage_classification_loss,
'second_stage_classification_loss_weight': 'second_stage_classification_loss_weight':
second_stage_classification_loss_weight, second_stage_classification_loss_weight,
'hard_example_miner': hard_example_miner} 'hard_example_miner': hard_example_miner}
...@@ -300,4 +320,6 @@ def _build_faster_rcnn_model(frcnn_config, is_training): ...@@ -300,4 +320,6 @@ def _build_faster_rcnn_model(frcnn_config, is_training):
maxpool_kernel_size=maxpool_kernel_size, maxpool_kernel_size=maxpool_kernel_size,
maxpool_stride=maxpool_stride, maxpool_stride=maxpool_stride,
second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
second_stage_mask_prediction_loss_weight=(
second_stage_mask_prediction_loss_weight),
**common_kwargs) **common_kwargs)
...@@ -23,8 +23,10 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch ...@@ -23,8 +23,10 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch
from object_detection.meta_architectures import rfcn_meta_arch from object_detection.meta_architectures import rfcn_meta_arch
from object_detection.meta_architectures import ssd_meta_arch from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
from object_detection.models import faster_rcnn_inception_v2_feature_extractor as frcnn_inc_v2
from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1 from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1
from object_detection.models.ssd_inception_v2_feature_extractor import SSDInceptionV2FeatureExtractor from object_detection.models.ssd_inception_v2_feature_extractor import SSDInceptionV2FeatureExtractor
from object_detection.models.ssd_inception_v3_feature_extractor import SSDInceptionV3FeatureExtractor
from object_detection.models.ssd_mobilenet_v1_feature_extractor import SSDMobileNetV1FeatureExtractor from object_detection.models.ssd_mobilenet_v1_feature_extractor import SSDMobileNetV1FeatureExtractor
from object_detection.protos import model_pb2 from object_detection.protos import model_pb2
...@@ -123,6 +125,77 @@ class ModelBuilderTest(tf.test.TestCase): ...@@ -123,6 +125,77 @@ class ModelBuilderTest(tf.test.TestCase):
self.assertIsInstance(model._feature_extractor, self.assertIsInstance(model._feature_extractor,
SSDInceptionV2FeatureExtractor) SSDInceptionV2FeatureExtractor)
def test_create_ssd_inception_v3_model_from_config(self):
model_text_proto = """
ssd {
feature_extractor {
type: 'ssd_inception_v3'
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
box_coder {
faster_rcnn_box_coder {
}
}
matcher {
argmax_matcher {
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
aspect_ratios: 1.0
}
}
image_resizer {
fixed_shape_resizer {
height: 320
width: 320
}
}
box_predictor {
convolutional_box_predictor {
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
}
loss {
classification_loss {
weighted_softmax {
}
}
localization_loss {
weighted_smooth_l1 {
}
}
}
}"""
model_proto = model_pb2.DetectionModel()
text_format.Merge(model_text_proto, model_proto)
model = self.create_model(model_proto)
self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
self.assertIsInstance(model._feature_extractor,
SSDInceptionV3FeatureExtractor)
def test_create_ssd_mobilenet_v1_model_from_config(self): def test_create_ssd_mobilenet_v1_model_from_config(self):
model_text_proto = """ model_text_proto = """
ssd { ssd {
...@@ -138,6 +211,7 @@ class ModelBuilderTest(tf.test.TestCase): ...@@ -138,6 +211,7 @@ class ModelBuilderTest(tf.test.TestCase):
} }
} }
} }
batch_norm_trainable: true
} }
box_coder { box_coder {
faster_rcnn_box_coder { faster_rcnn_box_coder {
...@@ -193,6 +267,7 @@ class ModelBuilderTest(tf.test.TestCase): ...@@ -193,6 +267,7 @@ class ModelBuilderTest(tf.test.TestCase):
self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch) self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
self.assertIsInstance(model._feature_extractor, self.assertIsInstance(model._feature_extractor,
SSDMobileNetV1FeatureExtractor) SSDMobileNetV1FeatureExtractor)
self.assertTrue(model._feature_extractor._batch_norm_trainable)
def test_create_faster_rcnn_resnet_v1_models_from_config(self): def test_create_faster_rcnn_resnet_v1_models_from_config(self):
model_text_proto = """ model_text_proto = """
...@@ -255,12 +330,88 @@ class ModelBuilderTest(tf.test.TestCase): ...@@ -255,12 +330,88 @@ class ModelBuilderTest(tf.test.TestCase):
}""" }"""
model_proto = model_pb2.DetectionModel() model_proto = model_pb2.DetectionModel()
text_format.Merge(model_text_proto, model_proto) text_format.Merge(model_text_proto, model_proto)
for extractor_type, extractor_class in FEATURE_EXTRACTOR_MAPS.items(): for extractor_type, extractor_class in FEATURE_EXTRACTOR_MAPS.iteritems():
model_proto.faster_rcnn.feature_extractor.type = extractor_type model_proto.faster_rcnn.feature_extractor.type = extractor_type
model = model_builder.build(model_proto, is_training=True) model = model_builder.build(model_proto, is_training=True)
self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch) self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch)
self.assertIsInstance(model._feature_extractor, extractor_class) self.assertIsInstance(model._feature_extractor, extractor_class)
def test_create_faster_rcnn_resnet101_with_mask_prediction_enabled(self):
model_text_proto = """
faster_rcnn {
num_classes: 3
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 600
max_dimension: 1024
}
}
feature_extractor {
type: 'faster_rcnn_resnet101'
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
second_stage_box_predictor {
mask_rcnn_box_predictor {
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
predict_instance_masks: true
}
}
second_stage_mask_prediction_loss_weight: 3.0
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.01
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
}"""
model_proto = model_pb2.DetectionModel()
text_format.Merge(model_text_proto, model_proto)
model = model_builder.build(model_proto, is_training=True)
self.assertAlmostEqual(model._second_stage_mask_loss_weight, 3.0)
def test_create_faster_rcnn_inception_resnet_v2_model_from_config(self): def test_create_faster_rcnn_inception_resnet_v2_model_from_config(self):
model_text_proto = """ model_text_proto = """
faster_rcnn { faster_rcnn {
...@@ -328,6 +479,72 @@ class ModelBuilderTest(tf.test.TestCase): ...@@ -328,6 +479,72 @@ class ModelBuilderTest(tf.test.TestCase):
model._feature_extractor, model._feature_extractor,
frcnn_inc_res.FasterRCNNInceptionResnetV2FeatureExtractor) frcnn_inc_res.FasterRCNNInceptionResnetV2FeatureExtractor)
def test_create_faster_rcnn_inception_v2_model_from_config(self):
model_text_proto = """
faster_rcnn {
num_classes: 3
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 600
max_dimension: 1024
}
}
feature_extractor {
type: 'faster_rcnn_inception_v2'
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
second_stage_box_predictor {
mask_rcnn_box_predictor {
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.01
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
}"""
model_proto = model_pb2.DetectionModel()
text_format.Merge(model_text_proto, model_proto)
model = model_builder.build(model_proto, is_training=True)
self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch)
self.assertIsInstance(model._feature_extractor,
frcnn_inc_v2.FasterRCNNInceptionV2FeatureExtractor)
def test_create_faster_rcnn_model_from_config_with_example_miner(self): def test_create_faster_rcnn_model_from_config_with_example_miner(self):
model_text_proto = """ model_text_proto = """
faster_rcnn { faster_rcnn {
...@@ -445,7 +662,7 @@ class ModelBuilderTest(tf.test.TestCase): ...@@ -445,7 +662,7 @@ class ModelBuilderTest(tf.test.TestCase):
}""" }"""
model_proto = model_pb2.DetectionModel() model_proto = model_pb2.DetectionModel()
text_format.Merge(model_text_proto, model_proto) text_format.Merge(model_text_proto, model_proto)
for extractor_type, extractor_class in FEATURE_EXTRACTOR_MAPS.items(): for extractor_type, extractor_class in FEATURE_EXTRACTOR_MAPS.iteritems():
model_proto.faster_rcnn.feature_extractor.type = extractor_type model_proto.faster_rcnn.feature_extractor.type = extractor_type
model = model_builder.build(model_proto, is_training=True) model = model_builder.build(model_proto, is_training=True)
self.assertIsInstance(model, rfcn_meta_arch.RFCNMetaArch) self.assertIsInstance(model, rfcn_meta_arch.RFCNMetaArch)
......
...@@ -15,7 +15,6 @@ py_library( ...@@ -15,7 +15,6 @@ py_library(
], ],
deps = [ deps = [
"//tensorflow", "//tensorflow",
"//tensorflow_models/object_detection/utils:ops",
], ],
) )
...@@ -49,10 +48,25 @@ py_library( ...@@ -49,10 +48,25 @@ py_library(
":feature_map_generators", ":feature_map_generators",
"//tensorflow", "//tensorflow",
"//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch", "//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch",
"//tensorflow_models/object_detection/utils:ops",
"//tensorflow_models/slim:inception_v2", "//tensorflow_models/slim:inception_v2",
], ],
) )
py_library(
name = "ssd_inception_v3_feature_extractor",
srcs = [
"ssd_inception_v3_feature_extractor.py",
],
deps = [
":feature_map_generators",
"//tensorflow",
"//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch",
"//tensorflow_models/object_detection/utils:ops",
"//tensorflow_models/slim:inception_v3",
],
)
py_library( py_library(
name = "ssd_mobilenet_v1_feature_extractor", name = "ssd_mobilenet_v1_feature_extractor",
srcs = ["ssd_mobilenet_v1_feature_extractor.py"], srcs = ["ssd_mobilenet_v1_feature_extractor.py"],
...@@ -60,6 +74,19 @@ py_library( ...@@ -60,6 +74,19 @@ py_library(
":feature_map_generators", ":feature_map_generators",
"//tensorflow", "//tensorflow",
"//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch", "//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch",
"//tensorflow_models/object_detection/utils:ops",
"//tensorflow_models/slim:mobilenet_v1",
],
)
py_library(
name = "embedded_ssd_mobilenet_v1_feature_extractor",
srcs = ["embedded_ssd_mobilenet_v1_feature_extractor.py"],
deps = [
":feature_map_generators",
":ssd_mobilenet_v1_feature_extractor",
"//tensorflow",
"//tensorflow_models/object_detection/utils:ops",
"//tensorflow_models/slim:mobilenet_v1", "//tensorflow_models/slim:mobilenet_v1",
], ],
) )
...@@ -76,6 +103,18 @@ py_test( ...@@ -76,6 +103,18 @@ py_test(
], ],
) )
py_test(
name = "ssd_inception_v3_feature_extractor_test",
srcs = [
"ssd_inception_v3_feature_extractor_test.py",
],
deps = [
":ssd_feature_extractor_test",
":ssd_inception_v3_feature_extractor",
"//tensorflow",
],
)
py_test( py_test(
name = "ssd_mobilenet_v1_feature_extractor_test", name = "ssd_mobilenet_v1_feature_extractor_test",
srcs = ["ssd_mobilenet_v1_feature_extractor_test.py"], srcs = ["ssd_mobilenet_v1_feature_extractor_test.py"],
...@@ -86,6 +125,16 @@ py_test( ...@@ -86,6 +125,16 @@ py_test(
], ],
) )
py_test(
name = "embedded_ssd_mobilenet_v1_feature_extractor_test",
srcs = ["embedded_ssd_mobilenet_v1_feature_extractor_test.py"],
deps = [
":embedded_ssd_mobilenet_v1_feature_extractor",
":ssd_feature_extractor_test",
"//tensorflow",
],
)
py_library( py_library(
name = "faster_rcnn_inception_resnet_v2_feature_extractor", name = "faster_rcnn_inception_resnet_v2_feature_extractor",
srcs = [ srcs = [
...@@ -109,6 +158,29 @@ py_test( ...@@ -109,6 +158,29 @@ py_test(
], ],
) )
py_library(
name = "faster_rcnn_inception_v2_feature_extractor",
srcs = [
"faster_rcnn_inception_v2_feature_extractor.py",
],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch",
"//tensorflow_models/slim:inception_v2",
],
)
py_test(
name = "faster_rcnn_inception_v2_feature_extractor_test",
srcs = [
"faster_rcnn_inception_v2_feature_extractor_test.py",
],
deps = [
":faster_rcnn_inception_v2_feature_extractor",
"//tensorflow",
],
)
py_library( py_library(
name = "faster_rcnn_resnet_v1_feature_extractor", name = "faster_rcnn_resnet_v1_feature_extractor",
srcs = [ srcs = [
......
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Embedded-friendly SSDFeatureExtractor for MobilenetV1 features."""
import tensorflow as tf
from object_detection.models import feature_map_generators
from object_detection.models import ssd_mobilenet_v1_feature_extractor
from object_detection.utils import ops
from nets import mobilenet_v1
slim = tf.contrib.slim
class EmbeddedSSDMobileNetV1FeatureExtractor(
ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor):
"""Embedded-friendly SSD Feature Extractor using MobilenetV1 features.
This feature extractor is similar to SSD MobileNetV1 feature extractor, and
it fixes input resolution to be 256x256, reduces the number of feature maps
used for box prediction and ensures convolution kernel to be no larger
than input tensor in spatial dimensions.
This feature extractor requires support of the following ops if used in
embedded devices:
- Conv
- DepthwiseConv
- Relu6
All conv/depthwiseconv use SAME padding, and no additional spatial padding is
needed.
"""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
batch_norm_trainable=True,
reuse_weights=None):
"""MobileNetV1 Feature Extractor for Embedded-friendly SSD Models.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. For EmbeddedSSD it must be set to 1.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
Raises:
ValueError: upon invalid `pad_to_multiple` values.
"""
if pad_to_multiple != 1:
raise ValueError('Embedded-specific SSD only supports `pad_to_multiple` '
'of 1.')
super(EmbeddedSSDMobileNetV1FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights)
def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
preprocessed_inputs.get_shape().assert_has_rank(4)
shape_assert = tf.Assert(
tf.logical_and(
tf.equal(tf.shape(preprocessed_inputs)[1], 256),
tf.equal(tf.shape(preprocessed_inputs)[2], 256)),
['image size must be 256 in both height and width.'])
feature_map_layout = {
'from_layer': [
'Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', ''
],
'layer_depth': [-1, -1, 512, 256, 256],
'conv_kernel_size': [-1, -1, 3, 3, 2],
}
with tf.control_dependencies([shape_assert]):
with slim.arg_scope(self._conv_hyperparams):
with tf.variable_scope('MobilenetV1',
reuse=self._reuse_weights) as scope:
_, image_features = mobilenet_v1.mobilenet_v1_base(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
final_endpoint='Conv2d_13_pointwise',
min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
scope=scope)
feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_map_layout=feature_map_layout,
depth_multiplier=self._depth_multiplier,
min_depth=self._min_depth,
insert_1x1_conv=True,
image_features=image_features)
return feature_maps.values()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for embedded_ssd_mobilenet_v1_feature_extractor."""
import numpy as np
import tensorflow as tf
from object_detection.models import embedded_ssd_mobilenet_v1_feature_extractor
from object_detection.models import ssd_feature_extractor_test
class EmbeddedSSDMobileNetV1FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, batch_norm_trainable=True):
"""Constructs a new feature extractor.
Args:
depth_multiplier: float depth multiplier for feature extractor
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
is_training: whether the network is in training mode.
batch_norm_trainable: whether to update batch norm parameters during
training.
Returns:
an ssd_meta_arch.SSDFeatureExtractor object.
"""
min_depth = 32
conv_hyperparams = {}
return (embedded_ssd_mobilenet_v1_feature_extractor.
EmbeddedSSDMobileNetV1FeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable))
def test_extract_features_returns_correct_shapes_256(self):
image_height = 256
image_width = 256
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(4, 16, 16, 512), (4, 8, 8, 1024),
(4, 4, 4, 512), (4, 2, 2, 256),
(4, 1, 1, 256)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
image_height = 256
image_width = 256
depth_multiplier = 0.5**12
pad_to_multiple = 1
expected_feature_map_shape = [(4, 16, 16, 32), (4, 8, 8, 32), (4, 4, 4, 32),
(4, 2, 2, 32), (4, 1, 1, 32)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_with_pad_to_multiple_of_1(
self):
image_height = 256
image_width = 256
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(4, 16, 16, 512), (4, 8, 8, 1024),
(4, 4, 4, 512), (4, 2, 2, 256),
(4, 1, 1, 256)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_raises_error_with_pad_to_multiple_not_1(self):
depth_multiplier = 1.0
pad_to_multiple = 2
with self.assertRaises(ValueError):
_ = self._create_feature_extractor(depth_multiplier, pad_to_multiple)
def test_extract_features_raises_error_with_invalid_image_size(self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
pad_to_multiple = 1
self.check_extract_features_raises_error_with_invalid_image_size(
image_height, image_width, depth_multiplier, pad_to_multiple)
def test_preprocess_returns_correct_value_range(self):
image_height = 256
image_width = 256
depth_multiplier = 1
pad_to_multiple = 1
test_image = np.random.rand(4, image_height, image_width, 3)
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
preprocessed_image = feature_extractor.preprocess(test_image)
self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
def test_variables_only_created_in_scope(self):
depth_multiplier = 1
pad_to_multiple = 1
scope_name = 'MobilenetV1'
self.check_feature_extractor_variables_under_scope(
depth_multiplier, pad_to_multiple, scope_name)
if __name__ == '__main__':
tf.test.main()
...@@ -37,6 +37,7 @@ class FasterRCNNInceptionResnetV2FeatureExtractor( ...@@ -37,6 +37,7 @@ class FasterRCNNInceptionResnetV2FeatureExtractor(
def __init__(self, def __init__(self,
is_training, is_training,
first_stage_features_stride, first_stage_features_stride,
batch_norm_trainable=False,
reuse_weights=None, reuse_weights=None,
weight_decay=0.0): weight_decay=0.0):
"""Constructor. """Constructor.
...@@ -44,6 +45,7 @@ class FasterRCNNInceptionResnetV2FeatureExtractor( ...@@ -44,6 +45,7 @@ class FasterRCNNInceptionResnetV2FeatureExtractor(
Args: Args:
is_training: See base class. is_training: See base class.
first_stage_features_stride: See base class. first_stage_features_stride: See base class.
batch_norm_trainable: See base class.
reuse_weights: See base class. reuse_weights: See base class.
weight_decay: See base class. weight_decay: See base class.
...@@ -53,7 +55,8 @@ class FasterRCNNInceptionResnetV2FeatureExtractor( ...@@ -53,7 +55,8 @@ class FasterRCNNInceptionResnetV2FeatureExtractor(
if first_stage_features_stride != 8 and first_stage_features_stride != 16: if first_stage_features_stride != 8 and first_stage_features_stride != 16:
raise ValueError('`first_stage_features_stride` must be 8 or 16.') raise ValueError('`first_stage_features_stride` must be 8 or 16.')
super(FasterRCNNInceptionResnetV2FeatureExtractor, self).__init__( super(FasterRCNNInceptionResnetV2FeatureExtractor, self).__init__(
is_training, first_stage_features_stride, reuse_weights, weight_decay) is_training, first_stage_features_stride, batch_norm_trainable,
reuse_weights, weight_decay)
def preprocess(self, resized_inputs): def preprocess(self, resized_inputs):
"""Faster R-CNN with Inception Resnet v2 preprocessing. """Faster R-CNN with Inception Resnet v2 preprocessing.
...@@ -98,7 +101,8 @@ class FasterRCNNInceptionResnetV2FeatureExtractor( ...@@ -98,7 +101,8 @@ class FasterRCNNInceptionResnetV2FeatureExtractor(
with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope( with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope(
weight_decay=self._weight_decay)): weight_decay=self._weight_decay)):
# Forces is_training to False to disable batch norm update. # Forces is_training to False to disable batch norm update.
with slim.arg_scope([slim.batch_norm], is_training=False): with slim.arg_scope([slim.batch_norm],
is_training=self._train_batch_norm):
with tf.variable_scope('InceptionResnetV2', with tf.variable_scope('InceptionResnetV2',
reuse=self._reuse_weights) as scope: reuse=self._reuse_weights) as scope:
rpn_feature_map, _ = ( rpn_feature_map, _ = (
...@@ -129,7 +133,8 @@ class FasterRCNNInceptionResnetV2FeatureExtractor( ...@@ -129,7 +133,8 @@ class FasterRCNNInceptionResnetV2FeatureExtractor(
with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope( with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope(
weight_decay=self._weight_decay)): weight_decay=self._weight_decay)):
# Forces is_training to False to disable batch norm update. # Forces is_training to False to disable batch norm update.
with slim.arg_scope([slim.batch_norm], is_training=False): with slim.arg_scope([slim.batch_norm],
is_training=self._train_batch_norm):
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
stride=1, padding='SAME'): stride=1, padding='SAME'):
with tf.variable_scope('Mixed_7a'): with tf.variable_scope('Mixed_7a'):
...@@ -207,3 +212,4 @@ class FasterRCNNInceptionResnetV2FeatureExtractor( ...@@ -207,3 +212,4 @@ class FasterRCNNInceptionResnetV2FeatureExtractor(
second_stage_feature_extractor_scope + '/', '') second_stage_feature_extractor_scope + '/', '')
variables_to_restore[var_name] = variable variables_to_restore[var_name] = variable
return variables_to_restore return variables_to_restore
...@@ -26,6 +26,7 @@ class FasterRcnnInceptionResnetV2FeatureExtractorTest(tf.test.TestCase): ...@@ -26,6 +26,7 @@ class FasterRcnnInceptionResnetV2FeatureExtractorTest(tf.test.TestCase):
return frcnn_inc_res.FasterRCNNInceptionResnetV2FeatureExtractor( return frcnn_inc_res.FasterRCNNInceptionResnetV2FeatureExtractor(
is_training=False, is_training=False,
first_stage_features_stride=first_stage_features_stride, first_stage_features_stride=first_stage_features_stride,
batch_norm_trainable=False,
reuse_weights=None, reuse_weights=None,
weight_decay=0.0) weight_decay=0.0)
......
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Inception V2 Faster R-CNN implementation.
See "Rethinking the Inception Architecture for Computer Vision"
https://arxiv.org/abs/1512.00567
"""
import tensorflow as tf
from object_detection.meta_architectures import faster_rcnn_meta_arch
from nets import inception_v2
slim = tf.contrib.slim
def _batch_norm_arg_scope(list_ops,
use_batch_norm=True,
batch_norm_decay=0.9997,
batch_norm_epsilon=0.001,
batch_norm_scale=False,
train_batch_norm=False):
"""Slim arg scope for InceptionV2 batch norm."""
if use_batch_norm:
batch_norm_params = {
'is_training': train_batch_norm,
'scale': batch_norm_scale,
'decay': batch_norm_decay,
'epsilon': batch_norm_epsilon
}
normalizer_fn = slim.batch_norm
else:
normalizer_fn = None
batch_norm_params = None
return slim.arg_scope(list_ops,
normalizer_fn=normalizer_fn,
normalizer_params=batch_norm_params)
class FasterRCNNInceptionV2FeatureExtractor(
faster_rcnn_meta_arch.FasterRCNNFeatureExtractor):
"""Faster R-CNN Inception V2 feature extractor implementation."""
def __init__(self,
is_training,
first_stage_features_stride,
batch_norm_trainable=False,
reuse_weights=None,
weight_decay=0.0,
depth_multiplier=1.0,
min_depth=16):
"""Constructor.
Args:
is_training: See base class.
first_stage_features_stride: See base class.
batch_norm_trainable: See base class.
reuse_weights: See base class.
weight_decay: See base class.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
Raises:
ValueError: If `first_stage_features_stride` is not 8 or 16.
"""
if first_stage_features_stride != 8 and first_stage_features_stride != 16:
raise ValueError('`first_stage_features_stride` must be 8 or 16.')
self._depth_multiplier = depth_multiplier
self._min_depth = min_depth
super(FasterRCNNInceptionV2FeatureExtractor, self).__init__(
is_training, first_stage_features_stride, batch_norm_trainable,
reuse_weights, weight_decay)
def preprocess(self, resized_inputs):
"""Faster R-CNN Inception V2 preprocessing.
Maps pixel values to the range [-1, 1].
Args:
resized_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
return (2.0 / 255.0) * resized_inputs - 1.0
def _extract_proposal_features(self, preprocessed_inputs, scope):
"""Extracts first stage RPN features.
Args:
preprocessed_inputs: A [batch, height, width, channels] float32 tensor
representing a batch of images.
scope: A scope name.
Returns:
rpn_feature_map: A tensor with shape [batch, height, width, depth]
Raises:
InvalidArgumentError: If the spatial size of `preprocessed_inputs`
(height or width) is less than 33.
ValueError: If the created network is missing the required activation.
"""
preprocessed_inputs.get_shape().assert_has_rank(4)
shape_assert = tf.Assert(
tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
['image size must at least be 33 in both height and width.'])
with tf.control_dependencies([shape_assert]):
with tf.variable_scope('InceptionV2',
reuse=self._reuse_weights) as scope:
with _batch_norm_arg_scope([slim.conv2d, slim.separable_conv2d],
batch_norm_scale=True,
train_batch_norm=self._train_batch_norm):
_, activations = inception_v2.inception_v2_base(
preprocessed_inputs,
final_endpoint='Mixed_4e',
min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
scope=scope)
return activations['Mixed_4e']
def _extract_box_classifier_features(self, proposal_feature_maps, scope):
"""Extracts second stage box classifier features.
Args:
proposal_feature_maps: A 4-D float tensor with shape
[batch_size * self.max_num_proposals, crop_height, crop_width, depth]
representing the feature map cropped to each proposal.
scope: A scope name (unused).
Returns:
proposal_classifier_features: A 4-D float tensor with shape
[batch_size * self.max_num_proposals, height, width, depth]
representing box classifier features for each proposal.
"""
net = proposal_feature_maps
depth = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)
data_format = 'NHWC'
concat_dim = 3 if data_format == 'NHWC' else 1
with tf.variable_scope('InceptionV2', reuse=self._reuse_weights):
with slim.arg_scope(
[slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
stride=1,
padding='SAME',
data_format=data_format):
with _batch_norm_arg_scope([slim.conv2d, slim.separable_conv2d],
batch_norm_scale=True,
train_batch_norm=self._train_batch_norm):
with tf.variable_scope('Mixed_5a'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(
net, depth(128), [1, 1],
weights_initializer=trunc_normal(0.09),
scope='Conv2d_0a_1x1')
branch_0 = slim.conv2d(branch_0, depth(192), [3, 3], stride=2,
scope='Conv2d_1a_3x3')
with tf.variable_scope('Branch_1'):
branch_1 = slim.conv2d(
net, depth(192), [1, 1],
weights_initializer=trunc_normal(0.09),
scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, depth(256), [3, 3],
scope='Conv2d_0b_3x3')
branch_1 = slim.conv2d(branch_1, depth(256), [3, 3], stride=2,
scope='Conv2d_1a_3x3')
with tf.variable_scope('Branch_2'):
branch_2 = slim.max_pool2d(net, [3, 3], stride=2,
scope='MaxPool_1a_3x3')
net = tf.concat([branch_0, branch_1, branch_2], concat_dim)
with tf.variable_scope('Mixed_5b'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, depth(352), [1, 1],
scope='Conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch_1 = slim.conv2d(
net, depth(192), [1, 1],
weights_initializer=trunc_normal(0.09),
scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, depth(320), [3, 3],
scope='Conv2d_0b_3x3')
with tf.variable_scope('Branch_2'):
branch_2 = slim.conv2d(
net, depth(160), [1, 1],
weights_initializer=trunc_normal(0.09),
scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
scope='Conv2d_0b_3x3')
branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
scope='Conv2d_0c_3x3')
with tf.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(
branch_3, depth(128), [1, 1],
weights_initializer=trunc_normal(0.1),
scope='Conv2d_0b_1x1')
net = tf.concat([branch_0, branch_1, branch_2, branch_3],
concat_dim)
with tf.variable_scope('Mixed_5c'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, depth(352), [1, 1],
scope='Conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch_1 = slim.conv2d(
net, depth(192), [1, 1],
weights_initializer=trunc_normal(0.09),
scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, depth(320), [3, 3],
scope='Conv2d_0b_3x3')
with tf.variable_scope('Branch_2'):
branch_2 = slim.conv2d(
net, depth(192), [1, 1],
weights_initializer=trunc_normal(0.09),
scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
scope='Conv2d_0b_3x3')
branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
scope='Conv2d_0c_3x3')
with tf.variable_scope('Branch_3'):
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
branch_3 = slim.conv2d(
branch_3, depth(128), [1, 1],
weights_initializer=trunc_normal(0.1),
scope='Conv2d_0b_1x1')
proposal_classifier_features = tf.concat(
[branch_0, branch_1, branch_2, branch_3], concat_dim)
return proposal_classifier_features
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for faster_rcnn_inception_v2_feature_extractor."""
import numpy as np
import tensorflow as tf
from object_detection.models import faster_rcnn_inception_v2_feature_extractor as faster_rcnn_inception_v2
class FasterRcnnInceptionV2FeatureExtractorTest(tf.test.TestCase):
def _build_feature_extractor(self, first_stage_features_stride):
return faster_rcnn_inception_v2.FasterRCNNInceptionV2FeatureExtractor(
is_training=False,
first_stage_features_stride=first_stage_features_stride,
batch_norm_trainable=False,
reuse_weights=None,
weight_decay=0.0)
def test_extract_proposal_features_returns_expected_size(self):
feature_extractor = self._build_feature_extractor(
first_stage_features_stride=16)
preprocessed_inputs = tf.random_uniform(
[4, 224, 224, 3], maxval=255, dtype=tf.float32)
rpn_feature_map = feature_extractor.extract_proposal_features(
preprocessed_inputs, scope='TestScope')
features_shape = tf.shape(rpn_feature_map)
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
features_shape_out = sess.run(features_shape)
self.assertAllEqual(features_shape_out, [4, 14, 14, 576])
def test_extract_proposal_features_stride_eight(self):
feature_extractor = self._build_feature_extractor(
first_stage_features_stride=8)
preprocessed_inputs = tf.random_uniform(
[4, 224, 224, 3], maxval=255, dtype=tf.float32)
rpn_feature_map = feature_extractor.extract_proposal_features(
preprocessed_inputs, scope='TestScope')
features_shape = tf.shape(rpn_feature_map)
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
features_shape_out = sess.run(features_shape)
self.assertAllEqual(features_shape_out, [4, 14, 14, 576])
def test_extract_proposal_features_half_size_input(self):
feature_extractor = self._build_feature_extractor(
first_stage_features_stride=16)
preprocessed_inputs = tf.random_uniform(
[1, 112, 112, 3], maxval=255, dtype=tf.float32)
rpn_feature_map = feature_extractor.extract_proposal_features(
preprocessed_inputs, scope='TestScope')
features_shape = tf.shape(rpn_feature_map)
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
features_shape_out = sess.run(features_shape)
self.assertAllEqual(features_shape_out, [1, 7, 7, 576])
def test_extract_proposal_features_dies_on_invalid_stride(self):
with self.assertRaises(ValueError):
self._build_feature_extractor(first_stage_features_stride=99)
def test_extract_proposal_features_dies_on_very_small_images(self):
feature_extractor = self._build_feature_extractor(
first_stage_features_stride=16)
preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
rpn_feature_map = feature_extractor.extract_proposal_features(
preprocessed_inputs, scope='TestScope')
features_shape = tf.shape(rpn_feature_map)
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
with self.assertRaises(tf.errors.InvalidArgumentError):
sess.run(
features_shape,
feed_dict={preprocessed_inputs: np.random.rand(4, 32, 32, 3)})
def test_extract_proposal_features_dies_with_incorrect_rank_inputs(self):
feature_extractor = self._build_feature_extractor(
first_stage_features_stride=16)
preprocessed_inputs = tf.random_uniform(
[224, 224, 3], maxval=255, dtype=tf.float32)
with self.assertRaises(ValueError):
feature_extractor.extract_proposal_features(
preprocessed_inputs, scope='TestScope')
def test_extract_box_classifier_features_returns_expected_size(self):
feature_extractor = self._build_feature_extractor(
first_stage_features_stride=16)
proposal_feature_maps = tf.random_uniform(
[3, 14, 14, 576], maxval=255, dtype=tf.float32)
proposal_classifier_features = (
feature_extractor.extract_box_classifier_features(
proposal_feature_maps, scope='TestScope'))
features_shape = tf.shape(proposal_classifier_features)
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
features_shape_out = sess.run(features_shape)
self.assertAllEqual(features_shape_out, [3, 7, 7, 1024])
if __name__ == '__main__':
tf.test.main()
...@@ -42,6 +42,7 @@ class FasterRCNNResnetV1FeatureExtractor( ...@@ -42,6 +42,7 @@ class FasterRCNNResnetV1FeatureExtractor(
resnet_model, resnet_model,
is_training, is_training,
first_stage_features_stride, first_stage_features_stride,
batch_norm_trainable=False,
reuse_weights=None, reuse_weights=None,
weight_decay=0.0): weight_decay=0.0):
"""Constructor. """Constructor.
...@@ -51,6 +52,7 @@ class FasterRCNNResnetV1FeatureExtractor( ...@@ -51,6 +52,7 @@ class FasterRCNNResnetV1FeatureExtractor(
resnet_model: Definition of the Resnet V1 model. resnet_model: Definition of the Resnet V1 model.
is_training: See base class. is_training: See base class.
first_stage_features_stride: See base class. first_stage_features_stride: See base class.
batch_norm_trainable: See base class.
reuse_weights: See base class. reuse_weights: See base class.
weight_decay: See base class. weight_decay: See base class.
...@@ -62,7 +64,8 @@ class FasterRCNNResnetV1FeatureExtractor( ...@@ -62,7 +64,8 @@ class FasterRCNNResnetV1FeatureExtractor(
self._architecture = architecture self._architecture = architecture
self._resnet_model = resnet_model self._resnet_model = resnet_model
super(FasterRCNNResnetV1FeatureExtractor, self).__init__( super(FasterRCNNResnetV1FeatureExtractor, self).__init__(
is_training, first_stage_features_stride, reuse_weights, weight_decay) is_training, first_stage_features_stride, batch_norm_trainable,
reuse_weights, weight_decay)
def preprocess(self, resized_inputs): def preprocess(self, resized_inputs):
"""Faster R-CNN Resnet V1 preprocessing. """Faster R-CNN Resnet V1 preprocessing.
...@@ -119,7 +122,7 @@ class FasterRCNNResnetV1FeatureExtractor( ...@@ -119,7 +122,7 @@ class FasterRCNNResnetV1FeatureExtractor(
_, activations = self._resnet_model( _, activations = self._resnet_model(
preprocessed_inputs, preprocessed_inputs,
num_classes=None, num_classes=None,
is_training=False, is_training=self._train_batch_norm,
global_pool=False, global_pool=False,
output_stride=self._first_stage_features_stride, output_stride=self._first_stage_features_stride,
spatial_squeeze=False, spatial_squeeze=False,
...@@ -148,7 +151,8 @@ class FasterRCNNResnetV1FeatureExtractor( ...@@ -148,7 +151,8 @@ class FasterRCNNResnetV1FeatureExtractor(
batch_norm_epsilon=1e-5, batch_norm_epsilon=1e-5,
batch_norm_scale=True, batch_norm_scale=True,
weight_decay=self._weight_decay)): weight_decay=self._weight_decay)):
with slim.arg_scope([slim.batch_norm], is_training=False): with slim.arg_scope([slim.batch_norm],
is_training=self._train_batch_norm):
blocks = [ blocks = [
resnet_utils.Block('block4', resnet_v1.bottleneck, [{ resnet_utils.Block('block4', resnet_v1.bottleneck, [{
'depth': 2048, 'depth': 2048,
...@@ -167,6 +171,7 @@ class FasterRCNNResnet50FeatureExtractor(FasterRCNNResnetV1FeatureExtractor): ...@@ -167,6 +171,7 @@ class FasterRCNNResnet50FeatureExtractor(FasterRCNNResnetV1FeatureExtractor):
def __init__(self, def __init__(self,
is_training, is_training,
first_stage_features_stride, first_stage_features_stride,
batch_norm_trainable=False,
reuse_weights=None, reuse_weights=None,
weight_decay=0.0): weight_decay=0.0):
"""Constructor. """Constructor.
...@@ -174,6 +179,7 @@ class FasterRCNNResnet50FeatureExtractor(FasterRCNNResnetV1FeatureExtractor): ...@@ -174,6 +179,7 @@ class FasterRCNNResnet50FeatureExtractor(FasterRCNNResnetV1FeatureExtractor):
Args: Args:
is_training: See base class. is_training: See base class.
first_stage_features_stride: See base class. first_stage_features_stride: See base class.
batch_norm_trainable: See base class.
reuse_weights: See base class. reuse_weights: See base class.
weight_decay: See base class. weight_decay: See base class.
...@@ -183,7 +189,8 @@ class FasterRCNNResnet50FeatureExtractor(FasterRCNNResnetV1FeatureExtractor): ...@@ -183,7 +189,8 @@ class FasterRCNNResnet50FeatureExtractor(FasterRCNNResnetV1FeatureExtractor):
""" """
super(FasterRCNNResnet50FeatureExtractor, self).__init__( super(FasterRCNNResnet50FeatureExtractor, self).__init__(
'resnet_v1_50', resnet_v1.resnet_v1_50, is_training, 'resnet_v1_50', resnet_v1.resnet_v1_50, is_training,
first_stage_features_stride, reuse_weights, weight_decay) first_stage_features_stride, batch_norm_trainable,
reuse_weights, weight_decay)
class FasterRCNNResnet101FeatureExtractor(FasterRCNNResnetV1FeatureExtractor): class FasterRCNNResnet101FeatureExtractor(FasterRCNNResnetV1FeatureExtractor):
...@@ -192,6 +199,7 @@ class FasterRCNNResnet101FeatureExtractor(FasterRCNNResnetV1FeatureExtractor): ...@@ -192,6 +199,7 @@ class FasterRCNNResnet101FeatureExtractor(FasterRCNNResnetV1FeatureExtractor):
def __init__(self, def __init__(self,
is_training, is_training,
first_stage_features_stride, first_stage_features_stride,
batch_norm_trainable=False,
reuse_weights=None, reuse_weights=None,
weight_decay=0.0): weight_decay=0.0):
"""Constructor. """Constructor.
...@@ -199,6 +207,7 @@ class FasterRCNNResnet101FeatureExtractor(FasterRCNNResnetV1FeatureExtractor): ...@@ -199,6 +207,7 @@ class FasterRCNNResnet101FeatureExtractor(FasterRCNNResnetV1FeatureExtractor):
Args: Args:
is_training: See base class. is_training: See base class.
first_stage_features_stride: See base class. first_stage_features_stride: See base class.
batch_norm_trainable: See base class.
reuse_weights: See base class. reuse_weights: See base class.
weight_decay: See base class. weight_decay: See base class.
...@@ -208,7 +217,8 @@ class FasterRCNNResnet101FeatureExtractor(FasterRCNNResnetV1FeatureExtractor): ...@@ -208,7 +217,8 @@ class FasterRCNNResnet101FeatureExtractor(FasterRCNNResnetV1FeatureExtractor):
""" """
super(FasterRCNNResnet101FeatureExtractor, self).__init__( super(FasterRCNNResnet101FeatureExtractor, self).__init__(
'resnet_v1_101', resnet_v1.resnet_v1_101, is_training, 'resnet_v1_101', resnet_v1.resnet_v1_101, is_training,
first_stage_features_stride, reuse_weights, weight_decay) first_stage_features_stride, batch_norm_trainable,
reuse_weights, weight_decay)
class FasterRCNNResnet152FeatureExtractor(FasterRCNNResnetV1FeatureExtractor): class FasterRCNNResnet152FeatureExtractor(FasterRCNNResnetV1FeatureExtractor):
...@@ -217,6 +227,7 @@ class FasterRCNNResnet152FeatureExtractor(FasterRCNNResnetV1FeatureExtractor): ...@@ -217,6 +227,7 @@ class FasterRCNNResnet152FeatureExtractor(FasterRCNNResnetV1FeatureExtractor):
def __init__(self, def __init__(self,
is_training, is_training,
first_stage_features_stride, first_stage_features_stride,
batch_norm_trainable=False,
reuse_weights=None, reuse_weights=None,
weight_decay=0.0): weight_decay=0.0):
"""Constructor. """Constructor.
...@@ -224,6 +235,7 @@ class FasterRCNNResnet152FeatureExtractor(FasterRCNNResnetV1FeatureExtractor): ...@@ -224,6 +235,7 @@ class FasterRCNNResnet152FeatureExtractor(FasterRCNNResnetV1FeatureExtractor):
Args: Args:
is_training: See base class. is_training: See base class.
first_stage_features_stride: See base class. first_stage_features_stride: See base class.
batch_norm_trainable: See base class.
reuse_weights: See base class. reuse_weights: See base class.
weight_decay: See base class. weight_decay: See base class.
...@@ -233,4 +245,5 @@ class FasterRCNNResnet152FeatureExtractor(FasterRCNNResnetV1FeatureExtractor): ...@@ -233,4 +245,5 @@ class FasterRCNNResnet152FeatureExtractor(FasterRCNNResnetV1FeatureExtractor):
""" """
super(FasterRCNNResnet152FeatureExtractor, self).__init__( super(FasterRCNNResnet152FeatureExtractor, self).__init__(
'resnet_v1_152', resnet_v1.resnet_v1_152, is_training, 'resnet_v1_152', resnet_v1.resnet_v1_152, is_training,
first_stage_features_stride, reuse_weights, weight_decay) first_stage_features_stride, batch_norm_trainable,
reuse_weights, weight_decay)
...@@ -37,6 +37,7 @@ class FasterRcnnResnetV1FeatureExtractorTest(tf.test.TestCase): ...@@ -37,6 +37,7 @@ class FasterRcnnResnetV1FeatureExtractorTest(tf.test.TestCase):
return feature_extractor_map[architecture]( return feature_extractor_map[architecture](
is_training=False, is_training=False,
first_stage_features_stride=first_stage_features_stride, first_stage_features_stride=first_stage_features_stride,
batch_norm_trainable=False,
reuse_weights=None, reuse_weights=None,
weight_decay=0.0) weight_decay=0.0)
......
...@@ -25,7 +25,6 @@ of final feature maps. ...@@ -25,7 +25,6 @@ of final feature maps.
""" """
import collections import collections
import tensorflow as tf import tensorflow as tf
from object_detection.utils import ops
slim = tf.contrib.slim slim = tf.contrib.slim
...@@ -59,12 +58,13 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier, ...@@ -59,12 +58,13 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
based on the spatial shape and depth configuration. Note that the current based on the spatial shape and depth configuration. Note that the current
implementation only supports generating new layers using convolution of implementation only supports generating new layers using convolution of
stride 2 resulting in a spatial resolution reduction by a factor of 2. stride 2 resulting in a spatial resolution reduction by a factor of 2.
By default convolution kernel size is set to 3, and it can be customized
by caller.
An example of the configuration for Inception V3: An example of the configuration for Inception V3:
{ {
'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''], 'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 128], 'layer_depth': [-1, -1, -1, 512, 256, 128]
'anchor_strides': [16, 32, 64, -1, -1, -1]
} }
Args: Args:
...@@ -72,14 +72,12 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier, ...@@ -72,14 +72,12 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
layouts in the following format (Inception V2/V3 respectively): layouts in the following format (Inception V2/V3 respectively):
{ {
'from_layer': ['Mixed_3c', 'Mixed_4c', 'Mixed_5c', '', '', ''], 'from_layer': ['Mixed_3c', 'Mixed_4c', 'Mixed_5c', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 128], 'layer_depth': [-1, -1, -1, 512, 256, 128]
'anchor_strides': [16, 32, 64, -1, -1, -1]
} }
or or
{ {
'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', '', ''], 'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 128], 'layer_depth': [-1, -1, -1, 512, 256, 128]
'anchor_strides': [16, 32, 64, -1, -1, -1]
} }
If 'from_layer' is specified, the specified feature map is directly used If 'from_layer' is specified, the specified feature map is directly used
as a box predictor layer, and the layer_depth is directly infered from the as a box predictor layer, and the layer_depth is directly infered from the
...@@ -90,14 +88,11 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier, ...@@ -90,14 +88,11 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
Note that the current implementation only supports generating new layers Note that the current implementation only supports generating new layers
using convolutions of stride 2 (resulting in a spatial resolution using convolutions of stride 2 (resulting in a spatial resolution
reduction by a factor of 2), and will be extended to a more flexible reduction by a factor of 2), and will be extended to a more flexible
design. Finally, the optional 'anchor_strides' can be used to specify the design. Convolution kernel size is set to 3 by default, and can be
anchor stride at each layer where 'from_layer' is specified. Our customized by 'conv_kernel_size' parameter (similarily, 'conv_kernel_size'
convention is to set 'anchor_strides' to -1 whenever at the positions that should be set to -1 if 'from_layer' is specified). The created convolution
'from_layer' is an empty string, and anchor strides at these layers will operation will be a normal 2D convolution by default, and a depthwise
be inferred from the previous layer's anchor strides and the current convolution followed by 1x1 convolution if 'use_depthwise' is set to True.
layer's stride length. In the case where 'anchor_strides' is not
specified, the anchor strides will default to the image width and height
divided by the number of anchors.
depth_multiplier: Depth multiplier for convolutional layers. depth_multiplier: Depth multiplier for convolutional layers.
min_depth: Minimum depth for convolutional layers. min_depth: Minimum depth for convolutional layers.
insert_1x1_conv: A boolean indicating whether an additional 1x1 convolution insert_1x1_conv: A boolean indicating whether an additional 1x1 convolution
...@@ -120,14 +115,14 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier, ...@@ -120,14 +115,14 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
feature_map_keys = [] feature_map_keys = []
feature_maps = [] feature_maps = []
base_from_layer = '' base_from_layer = ''
feature_map_strides = None
use_depthwise = False use_depthwise = False
if 'anchor_strides' in feature_map_layout:
feature_map_strides = (feature_map_layout['anchor_strides'])
if 'use_depthwise' in feature_map_layout: if 'use_depthwise' in feature_map_layout:
use_depthwise = feature_map_layout['use_depthwise'] use_depthwise = feature_map_layout['use_depthwise']
for index, (from_layer, layer_depth) in enumerate( for index, from_layer in enumerate(feature_map_layout['from_layer']):
zip(feature_map_layout['from_layer'], feature_map_layout['layer_depth'])): layer_depth = feature_map_layout['layer_depth'][index]
conv_kernel_size = 3
if 'conv_kernel_size' in feature_map_layout:
conv_kernel_size = feature_map_layout['conv_kernel_size'][index]
if from_layer: if from_layer:
feature_map = image_features[from_layer] feature_map = image_features[from_layer]
base_from_layer = from_layer base_from_layer = from_layer
...@@ -145,12 +140,13 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier, ...@@ -145,12 +140,13 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
stride=1, stride=1,
scope=layer_name) scope=layer_name)
stride = 2 stride = 2
layer_name = '{}_2_Conv2d_{}_3x3_s2_{}'.format( layer_name = '{}_2_Conv2d_{}_{}x{}_s2_{}'.format(
base_from_layer, index, depth_fn(layer_depth)) base_from_layer, index, conv_kernel_size, conv_kernel_size,
depth_fn(layer_depth))
if use_depthwise: if use_depthwise:
feature_map = slim.separable_conv2d( feature_map = slim.separable_conv2d(
ops.pad_to_multiple(intermediate_layer, stride), intermediate_layer,
None, [3, 3], None, [conv_kernel_size, conv_kernel_size],
depth_multiplier=1, depth_multiplier=1,
padding='SAME', padding='SAME',
stride=stride, stride=stride,
...@@ -163,16 +159,11 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier, ...@@ -163,16 +159,11 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
scope=layer_name) scope=layer_name)
else: else:
feature_map = slim.conv2d( feature_map = slim.conv2d(
ops.pad_to_multiple(intermediate_layer, stride), intermediate_layer,
depth_fn(layer_depth), [3, 3], depth_fn(layer_depth), [conv_kernel_size, conv_kernel_size],
padding='SAME', padding='SAME',
stride=stride, stride=stride,
scope=layer_name) scope=layer_name)
if (index > 0 and feature_map_strides and
feature_map_strides[index - 1] > 0):
feature_map_strides[index] = (
stride * feature_map_strides[index - 1])
feature_map_keys.append(layer_name) feature_map_keys.append(layer_name)
feature_maps.append(feature_map) feature_maps.append(feature_map)
return collections.OrderedDict( return collections.OrderedDict(
......
...@@ -33,8 +33,14 @@ INCEPTION_V3_LAYOUT = { ...@@ -33,8 +33,14 @@ INCEPTION_V3_LAYOUT = {
'aspect_ratios': [1.0, 2.0, 1.0/2, 3.0, 1.0/3] 'aspect_ratios': [1.0, 2.0, 1.0/2, 3.0, 1.0/3]
} }
EMBEDDED_SSD_MOBILENET_V1_LAYOUT = {
'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', ''],
'layer_depth': [-1, -1, 512, 256, 256],
'conv_kernel_size': [-1, -1, 3, 3, 2],
}
# TODO: add tests with different anchor strides. # TODO(rathodv): add tests with different anchor strides.
class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase): class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
def test_get_expected_feature_map_shapes_with_inception_v2(self): def test_get_expected_feature_map_shapes_with_inception_v2(self):
...@@ -96,6 +102,37 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase): ...@@ -96,6 +102,37 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
(key, value.shape) for key, value in out_feature_maps.items()) (key, value.shape) for key, value in out_feature_maps.items())
self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes) self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
def test_get_expected_feature_map_shapes_with_embedded_ssd_mobilenet_v1(
self):
image_features = {
'Conv2d_11_pointwise': tf.random_uniform([4, 16, 16, 512],
dtype=tf.float32),
'Conv2d_13_pointwise': tf.random_uniform([4, 8, 8, 1024],
dtype=tf.float32),
}
feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_map_layout=EMBEDDED_SSD_MOBILENET_V1_LAYOUT,
depth_multiplier=1,
min_depth=32,
insert_1x1_conv=True,
image_features=image_features)
expected_feature_map_shapes = {
'Conv2d_11_pointwise': (4, 16, 16, 512),
'Conv2d_13_pointwise': (4, 8, 8, 1024),
'Conv2d_13_pointwise_2_Conv2d_2_3x3_s2_512': (4, 4, 4, 512),
'Conv2d_13_pointwise_2_Conv2d_3_3x3_s2_256': (4, 2, 2, 256),
'Conv2d_13_pointwise_2_Conv2d_4_2x2_s2_256': (4, 1, 1, 256)}
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
out_feature_maps = sess.run(feature_maps)
out_feature_map_shapes = dict(
(key, value.shape) for key, value in out_feature_maps.items())
self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
class GetDepthFunctionTest(tf.test.TestCase): class GetDepthFunctionTest(tf.test.TestCase):
......
...@@ -46,34 +46,32 @@ class SsdFeatureExtractorTestBase(object): ...@@ -46,34 +46,32 @@ class SsdFeatureExtractorTestBase(object):
self.assertAllEqual(shape_out, exp_shape_out) self.assertAllEqual(shape_out, exp_shape_out)
@abstractmethod @abstractmethod
def _create_feature_extractor(self, depth_multiplier): def _create_feature_extractor(self, depth_multiplier, pad_to_multiple):
"""Constructs a new feature extractor. """Constructs a new feature extractor.
Args: Args:
depth_multiplier: float depth multiplier for feature extractor depth_multiplier: float depth multiplier for feature extractor
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
Returns: Returns:
an ssd_meta_arch.SSDFeatureExtractor object. an ssd_meta_arch.SSDFeatureExtractor object.
""" """
pass pass
def check_extract_features_returns_correct_shape( def check_extract_features_returns_correct_shape(
self, self, image_height, image_width, depth_multiplier, pad_to_multiple,
image_height,
image_width,
depth_multiplier,
expected_feature_map_shapes_out): expected_feature_map_shapes_out):
feature_extractor = self._create_feature_extractor(depth_multiplier) feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
preprocessed_inputs = tf.random_uniform( preprocessed_inputs = tf.random_uniform(
[4, image_height, image_width, 3], dtype=tf.float32) [4, image_height, image_width, 3], dtype=tf.float32)
self._validate_features_shape( self._validate_features_shape(
feature_extractor, preprocessed_inputs, expected_feature_map_shapes_out) feature_extractor, preprocessed_inputs, expected_feature_map_shapes_out)
def check_extract_features_raises_error_with_invalid_image_size( def check_extract_features_raises_error_with_invalid_image_size(
self, self, image_height, image_width, depth_multiplier, pad_to_multiple):
image_height, feature_extractor = self._create_feature_extractor(depth_multiplier,
image_width, pad_to_multiple)
depth_multiplier):
feature_extractor = self._create_feature_extractor(depth_multiplier)
preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3)) preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
feature_maps = feature_extractor.extract_features(preprocessed_inputs) feature_maps = feature_extractor.extract_features(preprocessed_inputs)
test_preprocessed_image = np.random.rand(4, image_height, image_width, 3) test_preprocessed_image = np.random.rand(4, image_height, image_width, 3)
...@@ -83,12 +81,12 @@ class SsdFeatureExtractorTestBase(object): ...@@ -83,12 +81,12 @@ class SsdFeatureExtractorTestBase(object):
sess.run(feature_maps, sess.run(feature_maps,
feed_dict={preprocessed_inputs: test_preprocessed_image}) feed_dict={preprocessed_inputs: test_preprocessed_image})
def check_feature_extractor_variables_under_scope(self, def check_feature_extractor_variables_under_scope(
depth_multiplier, self, depth_multiplier, pad_to_multiple, scope_name):
scope_name):
g = tf.Graph() g = tf.Graph()
with g.as_default(): with g.as_default():
feature_extractor = self._create_feature_extractor(depth_multiplier) feature_extractor = self._create_feature_extractor(
depth_multiplier, pad_to_multiple)
preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3)) preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
feature_extractor.extract_features(preprocessed_inputs) feature_extractor.extract_features(preprocessed_inputs)
variables = g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) variables = g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
......
...@@ -18,6 +18,7 @@ import tensorflow as tf ...@@ -18,6 +18,7 @@ import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators from object_detection.models import feature_map_generators
from object_detection.utils import ops
from nets import inception_v2 from nets import inception_v2
slim = tf.contrib.slim slim = tf.contrib.slim
...@@ -27,20 +28,31 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -27,20 +28,31 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""SSD Feature Extractor using InceptionV2 features.""" """SSD Feature Extractor using InceptionV2 features."""
def __init__(self, def __init__(self,
is_training,
depth_multiplier, depth_multiplier,
min_depth, min_depth,
pad_to_multiple,
conv_hyperparams, conv_hyperparams,
batch_norm_trainable=True,
reuse_weights=None): reuse_weights=None):
"""InceptionV2 Feature Extractor for SSD Models. """InceptionV2 Feature Extractor for SSD Models.
Args: Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor. depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth. min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
""" """
super(SSDInceptionV2FeatureExtractor, self).__init__( super(SSDInceptionV2FeatureExtractor, self).__init__(
depth_multiplier, min_depth, conv_hyperparams, reuse_weights) is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights)
def preprocess(self, resized_inputs): def preprocess(self, resized_inputs):
"""SSD preprocessing. """SSD preprocessing.
...@@ -84,7 +96,7 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -84,7 +96,7 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
with tf.variable_scope('InceptionV2', with tf.variable_scope('InceptionV2',
reuse=self._reuse_weights) as scope: reuse=self._reuse_weights) as scope:
_, image_features = inception_v2.inception_v2_base( _, image_features = inception_v2.inception_v2_base(
preprocessed_inputs, ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
final_endpoint='Mixed_5c', final_endpoint='Mixed_5c',
min_depth=self._min_depth, min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier, depth_multiplier=self._depth_multiplier,
......
...@@ -22,73 +22,101 @@ from object_detection.models import ssd_inception_v2_feature_extractor ...@@ -22,73 +22,101 @@ from object_detection.models import ssd_inception_v2_feature_extractor
class SsdInceptionV2FeatureExtractorTest( class SsdInceptionV2FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase, ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase):
tf.test.TestCase):
def _create_feature_extractor(self, depth_multiplier): def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, batch_norm_trainable=True):
"""Constructs a SsdInceptionV2FeatureExtractor. """Constructs a SsdInceptionV2FeatureExtractor.
Args: Args:
depth_multiplier: float depth multiplier for feature extractor depth_multiplier: float depth multiplier for feature extractor
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
is_training: whether the network is in training mode.
batch_norm_trainable: Whether to update batch norm parameters during
training or not
Returns: Returns:
an ssd_inception_v2_feature_extractor.SsdInceptionV2FeatureExtractor. an ssd_inception_v2_feature_extractor.SsdInceptionV2FeatureExtractor.
""" """
min_depth = 32 min_depth = 32
conv_hyperparams = {} conv_hyperparams = {}
return ssd_inception_v2_feature_extractor.SSDInceptionV2FeatureExtractor( return ssd_inception_v2_feature_extractor.SSDInceptionV2FeatureExtractor(
depth_multiplier, min_depth, conv_hyperparams) is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable)
def test_extract_features_returns_correct_shapes_128(self): def test_extract_features_returns_correct_shapes_128(self):
image_height = 128 image_height = 128
image_width = 128 image_width = 128
depth_multiplier = 1.0 depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(4, 8, 8, 576), (4, 4, 4, 1024), expected_feature_map_shape = [(4, 8, 8, 576), (4, 4, 4, 1024),
(4, 2, 2, 512), (4, 1, 1, 256), (4, 2, 2, 512), (4, 1, 1, 256),
(4, 1, 1, 256), (4, 1, 1, 128)] (4, 1, 1, 256), (4, 1, 1, 128)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, expected_feature_map_shape) image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_299(self): def test_extract_features_returns_correct_shapes_299(self):
image_height = 299 image_height = 299
image_width = 299 image_width = 299
depth_multiplier = 1.0 depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(4, 19, 19, 576), (4, 10, 10, 1024), expected_feature_map_shape = [(4, 19, 19, 576), (4, 10, 10, 1024),
(4, 5, 5, 512), (4, 3, 3, 256), (4, 5, 5, 512), (4, 3, 3, 256),
(4, 2, 2, 256), (4, 1, 1, 128)] (4, 2, 2, 256), (4, 1, 1, 128)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, expected_feature_map_shape) image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_enforcing_min_depth(self): def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
image_height = 299 image_height = 299
image_width = 299 image_width = 299
depth_multiplier = 0.5**12 depth_multiplier = 0.5**12
pad_to_multiple = 1
expected_feature_map_shape = [(4, 19, 19, 128), (4, 10, 10, 128), expected_feature_map_shape = [(4, 19, 19, 128), (4, 10, 10, 128),
(4, 5, 5, 32), (4, 3, 3, 32), (4, 5, 5, 32), (4, 3, 3, 32),
(4, 2, 2, 32), (4, 1, 1, 32)] (4, 2, 2, 32), (4, 1, 1, 32)]
self.check_extract_features_returns_correct_shape( self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, expected_feature_map_shape) image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
image_height = 299
image_width = 299
depth_multiplier = 1.0
pad_to_multiple = 32
expected_feature_map_shape = [(4, 20, 20, 576), (4, 10, 10, 1024),
(4, 5, 5, 512), (4, 3, 3, 256),
(4, 2, 2, 256), (4, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_raises_error_with_invalid_image_size(self): def test_extract_features_raises_error_with_invalid_image_size(self):
image_height = 32 image_height = 32
image_width = 32 image_width = 32
depth_multiplier = 1.0 depth_multiplier = 1.0
pad_to_multiple = 1
self.check_extract_features_raises_error_with_invalid_image_size( self.check_extract_features_raises_error_with_invalid_image_size(
image_height, image_width, depth_multiplier) image_height, image_width, depth_multiplier, pad_to_multiple)
def test_preprocess_returns_correct_value_range(self): def test_preprocess_returns_correct_value_range(self):
image_height = 128 image_height = 128
image_width = 128 image_width = 128
depth_multiplier = 1 depth_multiplier = 1
pad_to_multiple = 1
test_image = np.random.rand(4, image_height, image_width, 3) test_image = np.random.rand(4, image_height, image_width, 3)
feature_extractor = self._create_feature_extractor(depth_multiplier) feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
preprocessed_image = feature_extractor.preprocess(test_image) preprocessed_image = feature_extractor.preprocess(test_image)
self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0))) self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
def test_variables_only_created_in_scope(self): def test_variables_only_created_in_scope(self):
depth_multiplier = 1 depth_multiplier = 1
pad_to_multiple = 1
scope_name = 'InceptionV2' scope_name = 'InceptionV2'
self.check_feature_extractor_variables_under_scope(depth_multiplier, self.check_feature_extractor_variables_under_scope(
scope_name) depth_multiplier, pad_to_multiple, scope_name)
if __name__ == '__main__': if __name__ == '__main__':
......
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""SSDFeatureExtractor for InceptionV3 features."""
import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators
from object_detection.utils import ops
from nets import inception_v3
slim = tf.contrib.slim
class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""SSD Feature Extractor using InceptionV3 features."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
batch_norm_trainable=True,
reuse_weights=None):
"""InceptionV3 Feature Extractor for SSD Models.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
"""
super(SSDInceptionV3FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights)
def preprocess(self, resized_inputs):
"""SSD preprocessing.
Maps pixel values to the range [-1, 1].
Args:
resized_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
return (2.0 / 255.0) * resized_inputs - 1.0
def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
preprocessed_inputs.get_shape().assert_has_rank(4)
shape_assert = tf.Assert(
tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
['image size must at least be 33 in both height and width.'])
feature_map_layout = {
'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
'layer_depth': [-1, -1, -1, 512, 256, 128],
}
with tf.control_dependencies([shape_assert]):
with slim.arg_scope(self._conv_hyperparams):
with tf.variable_scope('InceptionV3',
reuse=self._reuse_weights) as scope:
_, image_features = inception_v3.inception_v3_base(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
final_endpoint='Mixed_7c',
min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
scope=scope)
feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_map_layout=feature_map_layout,
depth_multiplier=self._depth_multiplier,
min_depth=self._min_depth,
insert_1x1_conv=True,
image_features=image_features)
return feature_maps.values()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.models.ssd_inception_v3_feature_extractor."""
import numpy as np
import tensorflow as tf
from object_detection.models import ssd_feature_extractor_test
from object_detection.models import ssd_inception_v3_feature_extractor
class SsdInceptionV3FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, batch_norm_trainable=True):
"""Constructs a SsdInceptionV3FeatureExtractor.
Args:
depth_multiplier: float depth multiplier for feature extractor
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
is_training: whether the network is in training mode.
batch_norm_trainable: Whether to update batch norm parameters during
training or not
Returns:
an ssd_inception_v3_feature_extractor.SsdInceptionV3FeatureExtractor.
"""
min_depth = 32
conv_hyperparams = {}
return ssd_inception_v3_feature_extractor.SSDInceptionV3FeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable)
def test_extract_features_returns_correct_shapes_128(self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(4, 13, 13, 288), (4, 6, 6, 768),
(4, 2, 2, 2048), (4, 1, 1, 512),
(4, 1, 1, 256), (4, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_299(self):
image_height = 299
image_width = 299
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(4, 35, 35, 288), (4, 17, 17, 768),
(4, 8, 8, 2048), (4, 4, 4, 512),
(4, 2, 2, 256), (4, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
image_height = 299
image_width = 299
depth_multiplier = 0.5**12
pad_to_multiple = 1
expected_feature_map_shape = [(4, 35, 35, 128), (4, 17, 17, 128),
(4, 8, 8, 192), (4, 4, 4, 32),
(4, 2, 2, 32), (4, 1, 1, 32)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
image_height = 299
image_width = 299
depth_multiplier = 1.0
pad_to_multiple = 32
expected_feature_map_shape = [(4, 37, 37, 288), (4, 18, 18, 768),
(4, 8, 8, 2048), (4, 4, 4, 512),
(4, 2, 2, 256), (4, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_raises_error_with_invalid_image_size(self):
image_height = 32
image_width = 32
depth_multiplier = 1.0
pad_to_multiple = 1
self.check_extract_features_raises_error_with_invalid_image_size(
image_height, image_width, depth_multiplier, pad_to_multiple)
def test_preprocess_returns_correct_value_range(self):
image_height = 128
image_width = 128
depth_multiplier = 1
pad_to_multiple = 1
test_image = np.random.rand(4, image_height, image_width, 3)
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
preprocessed_image = feature_extractor.preprocess(test_image)
self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
def test_variables_only_created_in_scope(self):
depth_multiplier = 1
pad_to_multiple = 1
scope_name = 'InceptionV3'
self.check_feature_extractor_variables_under_scope(
depth_multiplier, pad_to_multiple, scope_name)
if __name__ == '__main__':
tf.test.main()
...@@ -19,6 +19,7 @@ import tensorflow as tf ...@@ -19,6 +19,7 @@ import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators from object_detection.models import feature_map_generators
from object_detection.utils import ops
from nets import mobilenet_v1 from nets import mobilenet_v1
slim = tf.contrib.slim slim = tf.contrib.slim
...@@ -28,20 +29,31 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -28,20 +29,31 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
"""SSD Feature Extractor using MobilenetV1 features.""" """SSD Feature Extractor using MobilenetV1 features."""
def __init__(self, def __init__(self,
is_training,
depth_multiplier, depth_multiplier,
min_depth, min_depth,
pad_to_multiple,
conv_hyperparams, conv_hyperparams,
batch_norm_trainable=True,
reuse_weights=None): reuse_weights=None):
"""MobileNetV1 Feature Extractor for SSD Models. """MobileNetV1 Feature Extractor for SSD Models.
Args: Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor. depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth. min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None. reuse_weights: Whether to reuse variables. Default is None.
""" """
super(SSDMobileNetV1FeatureExtractor, self).__init__( super(SSDMobileNetV1FeatureExtractor, self).__init__(
depth_multiplier, min_depth, conv_hyperparams, reuse_weights) is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights)
def preprocess(self, resized_inputs): def preprocess(self, resized_inputs):
"""SSD preprocessing. """SSD preprocessing.
...@@ -83,19 +95,20 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -83,19 +95,20 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
with tf.control_dependencies([shape_assert]): with tf.control_dependencies([shape_assert]):
with slim.arg_scope(self._conv_hyperparams): with slim.arg_scope(self._conv_hyperparams):
with tf.variable_scope('MobilenetV1', with slim.arg_scope([slim.batch_norm], fused=False):
reuse=self._reuse_weights) as scope: with tf.variable_scope('MobilenetV1',
_, image_features = mobilenet_v1.mobilenet_v1_base( reuse=self._reuse_weights) as scope:
preprocessed_inputs, _, image_features = mobilenet_v1.mobilenet_v1_base(
final_endpoint='Conv2d_13_pointwise', ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
min_depth=self._min_depth, final_endpoint='Conv2d_13_pointwise',
depth_multiplier=self._depth_multiplier, min_depth=self._min_depth,
scope=scope) depth_multiplier=self._depth_multiplier,
feature_maps = feature_map_generators.multi_resolution_feature_maps( scope=scope)
feature_map_layout=feature_map_layout, feature_maps = feature_map_generators.multi_resolution_feature_maps(
depth_multiplier=self._depth_multiplier, feature_map_layout=feature_map_layout,
min_depth=self._min_depth, depth_multiplier=self._depth_multiplier,
insert_1x1_conv=True, min_depth=self._min_depth,
image_features=image_features) insert_1x1_conv=True,
image_features=image_features)
return feature_maps.values() return feature_maps.values()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment