Commit f282f6ef authored by Alexander Gorban's avatar Alexander Gorban
Browse files

Merge branch 'master' of github.com:tensorflow/models

parents 58a5da7b a2970b03
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for matcher_builder."""
import tensorflow as tf
from google.protobuf import text_format
from object_detection.builders import matcher_builder
from object_detection.matchers import argmax_matcher
from object_detection.matchers import bipartite_matcher
from object_detection.protos import matcher_pb2
class MatcherBuilderTest(tf.test.TestCase):
def test_build_arg_max_matcher_with_defaults(self):
matcher_text_proto = """
argmax_matcher {
}
"""
matcher_proto = matcher_pb2.Matcher()
text_format.Merge(matcher_text_proto, matcher_proto)
matcher_object = matcher_builder.build(matcher_proto)
self.assertTrue(isinstance(matcher_object, argmax_matcher.ArgMaxMatcher))
self.assertAlmostEqual(matcher_object._matched_threshold, 0.5)
self.assertAlmostEqual(matcher_object._unmatched_threshold, 0.5)
self.assertTrue(matcher_object._negatives_lower_than_unmatched)
self.assertFalse(matcher_object._force_match_for_each_row)
def test_build_arg_max_matcher_without_thresholds(self):
matcher_text_proto = """
argmax_matcher {
ignore_thresholds: true
}
"""
matcher_proto = matcher_pb2.Matcher()
text_format.Merge(matcher_text_proto, matcher_proto)
matcher_object = matcher_builder.build(matcher_proto)
self.assertTrue(isinstance(matcher_object, argmax_matcher.ArgMaxMatcher))
self.assertEqual(matcher_object._matched_threshold, None)
self.assertEqual(matcher_object._unmatched_threshold, None)
self.assertTrue(matcher_object._negatives_lower_than_unmatched)
self.assertFalse(matcher_object._force_match_for_each_row)
def test_build_arg_max_matcher_with_non_default_parameters(self):
matcher_text_proto = """
argmax_matcher {
matched_threshold: 0.7
unmatched_threshold: 0.3
negatives_lower_than_unmatched: false
force_match_for_each_row: true
}
"""
matcher_proto = matcher_pb2.Matcher()
text_format.Merge(matcher_text_proto, matcher_proto)
matcher_object = matcher_builder.build(matcher_proto)
self.assertTrue(isinstance(matcher_object, argmax_matcher.ArgMaxMatcher))
self.assertAlmostEqual(matcher_object._matched_threshold, 0.7)
self.assertAlmostEqual(matcher_object._unmatched_threshold, 0.3)
self.assertFalse(matcher_object._negatives_lower_than_unmatched)
self.assertTrue(matcher_object._force_match_for_each_row)
def test_build_bipartite_matcher(self):
matcher_text_proto = """
bipartite_matcher {
}
"""
matcher_proto = matcher_pb2.Matcher()
text_format.Merge(matcher_text_proto, matcher_proto)
matcher_object = matcher_builder.build(matcher_proto)
self.assertTrue(
isinstance(matcher_object, bipartite_matcher.GreedyBipartiteMatcher))
def test_raise_error_on_empty_matcher(self):
matcher_text_proto = """
"""
matcher_proto = matcher_pb2.Matcher()
text_format.Merge(matcher_text_proto, matcher_proto)
with self.assertRaises(ValueError):
matcher_builder.build(matcher_proto)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A function to build a DetectionModel from configuration."""
from object_detection.builders import anchor_generator_builder
from object_detection.builders import box_coder_builder
from object_detection.builders import box_predictor_builder
from object_detection.builders import hyperparams_builder
from object_detection.builders import image_resizer_builder
from object_detection.builders import losses_builder
from object_detection.builders import matcher_builder
from object_detection.builders import post_processing_builder
from object_detection.builders import region_similarity_calculator_builder as sim_calc
from object_detection.core import box_predictor
from object_detection.meta_architectures import faster_rcnn_meta_arch
from object_detection.meta_architectures import rfcn_meta_arch
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1
from object_detection.models.ssd_inception_v2_feature_extractor import SSDInceptionV2FeatureExtractor
from object_detection.models.ssd_mobilenet_v1_feature_extractor import SSDMobileNetV1FeatureExtractor
from object_detection.protos import model_pb2
# A map of names to SSD feature extractors.
SSD_FEATURE_EXTRACTOR_CLASS_MAP = {
'ssd_inception_v2': SSDInceptionV2FeatureExtractor,
'ssd_mobilenet_v1': SSDMobileNetV1FeatureExtractor,
}
# A map of names to Faster R-CNN feature extractors.
FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP = {
'faster_rcnn_resnet50':
frcnn_resnet_v1.FasterRCNNResnet50FeatureExtractor,
'faster_rcnn_resnet101':
frcnn_resnet_v1.FasterRCNNResnet101FeatureExtractor,
'faster_rcnn_resnet152':
frcnn_resnet_v1.FasterRCNNResnet152FeatureExtractor,
'faster_rcnn_inception_resnet_v2':
frcnn_inc_res.FasterRCNNInceptionResnetV2FeatureExtractor
}
def build(model_config, is_training):
"""Builds a DetectionModel based on the model config.
Args:
model_config: A model.proto object containing the config for the desired
DetectionModel.
is_training: True if this model is being built for training purposes.
Returns:
DetectionModel based on the config.
Raises:
ValueError: On invalid meta architecture or model.
"""
if not isinstance(model_config, model_pb2.DetectionModel):
raise ValueError('model_config not of type model_pb2.DetectionModel.')
meta_architecture = model_config.WhichOneof('model')
if meta_architecture == 'ssd':
return _build_ssd_model(model_config.ssd, is_training)
if meta_architecture == 'faster_rcnn':
return _build_faster_rcnn_model(model_config.faster_rcnn, is_training)
raise ValueError('Unknown meta architecture: {}'.format(meta_architecture))
def _build_ssd_feature_extractor(feature_extractor_config, is_training,
reuse_weights=None):
"""Builds a ssd_meta_arch.SSDFeatureExtractor based on config.
Args:
feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto.
is_training: True if this feature extractor is being built for training.
reuse_weights: if the feature extractor should reuse weights.
Returns:
ssd_meta_arch.SSDFeatureExtractor based on config.
Raises:
ValueError: On invalid feature extractor type.
"""
feature_type = feature_extractor_config.type
depth_multiplier = feature_extractor_config.depth_multiplier
min_depth = feature_extractor_config.min_depth
conv_hyperparams = hyperparams_builder.build(
feature_extractor_config.conv_hyperparams, is_training)
if feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP:
raise ValueError('Unknown ssd feature_extractor: {}'.format(feature_type))
feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type]
return feature_extractor_class(depth_multiplier, min_depth, conv_hyperparams,
reuse_weights)
def _build_ssd_model(ssd_config, is_training):
"""Builds an SSD detection model based on the model config.
Args:
ssd_config: A ssd.proto object containing the config for the desired
SSDMetaArch.
is_training: True if this model is being built for training purposes.
Returns:
SSDMetaArch based on the config.
Raises:
ValueError: If ssd_config.type is not recognized (i.e. not registered in
model_class_map).
"""
num_classes = ssd_config.num_classes
# Feature extractor
feature_extractor = _build_ssd_feature_extractor(ssd_config.feature_extractor,
is_training)
box_coder = box_coder_builder.build(ssd_config.box_coder)
matcher = matcher_builder.build(ssd_config.matcher)
region_similarity_calculator = sim_calc.build(
ssd_config.similarity_calculator)
ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build,
ssd_config.box_predictor,
is_training, num_classes)
anchor_generator = anchor_generator_builder.build(
ssd_config.anchor_generator)
image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
ssd_config.post_processing)
(classification_loss, localization_loss, classification_weight,
localization_weight,
hard_example_miner) = losses_builder.build(ssd_config.loss)
normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches
return ssd_meta_arch.SSDMetaArch(
is_training,
anchor_generator,
ssd_box_predictor,
box_coder,
feature_extractor,
matcher,
region_similarity_calculator,
image_resizer_fn,
non_max_suppression_fn,
score_conversion_fn,
classification_loss,
localization_loss,
classification_weight,
localization_weight,
normalize_loss_by_num_matches,
hard_example_miner)
def _build_faster_rcnn_feature_extractor(
feature_extractor_config, is_training, reuse_weights=None):
"""Builds a faster_rcnn_meta_arch.FasterRCNNFeatureExtractor based on config.
Args:
feature_extractor_config: A FasterRcnnFeatureExtractor proto config from
faster_rcnn.proto.
is_training: True if this feature extractor is being built for training.
reuse_weights: if the feature extractor should reuse weights.
Returns:
faster_rcnn_meta_arch.FasterRCNNFeatureExtractor based on config.
Raises:
ValueError: On invalid feature extractor type.
"""
feature_type = feature_extractor_config.type
first_stage_features_stride = (
feature_extractor_config.first_stage_features_stride)
if feature_type not in FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP:
raise ValueError('Unknown Faster R-CNN feature_extractor: {}'.format(
feature_type))
feature_extractor_class = FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP[
feature_type]
return feature_extractor_class(
is_training, first_stage_features_stride, reuse_weights)
def _build_faster_rcnn_model(frcnn_config, is_training):
"""Builds a Faster R-CNN or R-FCN detection model based on the model config.
Builds R-FCN model if the second_stage_box_predictor in the config is of type
`rfcn_box_predictor` else builds a Faster R-CNN model.
Args:
frcnn_config: A faster_rcnn.proto object containing the config for the
desired FasterRCNNMetaArch or RFCNMetaArch.
is_training: True if this model is being built for training purposes.
Returns:
FasterRCNNMetaArch based on the config.
Raises:
ValueError: If frcnn_config.type is not recognized (i.e. not registered in
model_class_map).
"""
num_classes = frcnn_config.num_classes
image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)
feature_extractor = _build_faster_rcnn_feature_extractor(
frcnn_config.feature_extractor, is_training)
first_stage_only = frcnn_config.first_stage_only
first_stage_anchor_generator = anchor_generator_builder.build(
frcnn_config.first_stage_anchor_generator)
first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
first_stage_box_predictor_arg_scope = hyperparams_builder.build(
frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training)
first_stage_box_predictor_kernel_size = (
frcnn_config.first_stage_box_predictor_kernel_size)
first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth
first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size
first_stage_positive_balance_fraction = (
frcnn_config.first_stage_positive_balance_fraction)
first_stage_nms_score_threshold = frcnn_config.first_stage_nms_score_threshold
first_stage_nms_iou_threshold = frcnn_config.first_stage_nms_iou_threshold
first_stage_max_proposals = frcnn_config.first_stage_max_proposals
first_stage_loc_loss_weight = (
frcnn_config.first_stage_localization_loss_weight)
first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight
initial_crop_size = frcnn_config.initial_crop_size
maxpool_kernel_size = frcnn_config.maxpool_kernel_size
maxpool_stride = frcnn_config.maxpool_stride
second_stage_box_predictor = box_predictor_builder.build(
hyperparams_builder.build,
frcnn_config.second_stage_box_predictor,
is_training=is_training,
num_classes=num_classes)
second_stage_batch_size = frcnn_config.second_stage_batch_size
second_stage_balance_fraction = frcnn_config.second_stage_balance_fraction
(second_stage_non_max_suppression_fn, second_stage_score_conversion_fn
) = post_processing_builder.build(frcnn_config.second_stage_post_processing)
second_stage_localization_loss_weight = (
frcnn_config.second_stage_localization_loss_weight)
second_stage_classification_loss_weight = (
frcnn_config.second_stage_classification_loss_weight)
hard_example_miner = None
if frcnn_config.HasField('hard_example_miner'):
hard_example_miner = losses_builder.build_hard_example_miner(
frcnn_config.hard_example_miner,
second_stage_classification_loss_weight,
second_stage_localization_loss_weight)
common_kwargs = {
'is_training': is_training,
'num_classes': num_classes,
'image_resizer_fn': image_resizer_fn,
'feature_extractor': feature_extractor,
'first_stage_only': first_stage_only,
'first_stage_anchor_generator': first_stage_anchor_generator,
'first_stage_atrous_rate': first_stage_atrous_rate,
'first_stage_box_predictor_arg_scope':
first_stage_box_predictor_arg_scope,
'first_stage_box_predictor_kernel_size':
first_stage_box_predictor_kernel_size,
'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
'first_stage_minibatch_size': first_stage_minibatch_size,
'first_stage_positive_balance_fraction':
first_stage_positive_balance_fraction,
'first_stage_nms_score_threshold': first_stage_nms_score_threshold,
'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold,
'first_stage_max_proposals': first_stage_max_proposals,
'first_stage_localization_loss_weight': first_stage_loc_loss_weight,
'first_stage_objectness_loss_weight': first_stage_obj_loss_weight,
'second_stage_batch_size': second_stage_batch_size,
'second_stage_balance_fraction': second_stage_balance_fraction,
'second_stage_non_max_suppression_fn':
second_stage_non_max_suppression_fn,
'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
'second_stage_localization_loss_weight':
second_stage_localization_loss_weight,
'second_stage_classification_loss_weight':
second_stage_classification_loss_weight,
'hard_example_miner': hard_example_miner}
if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor):
return rfcn_meta_arch.RFCNMetaArch(
second_stage_rfcn_box_predictor=second_stage_box_predictor,
**common_kwargs)
else:
return faster_rcnn_meta_arch.FasterRCNNMetaArch(
initial_crop_size=initial_crop_size,
maxpool_kernel_size=maxpool_kernel_size,
maxpool_stride=maxpool_stride,
second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
**common_kwargs)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.models.model_builder."""
import tensorflow as tf
from google.protobuf import text_format
from object_detection.builders import model_builder
from object_detection.meta_architectures import faster_rcnn_meta_arch
from object_detection.meta_architectures import rfcn_meta_arch
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1
from object_detection.models.ssd_inception_v2_feature_extractor import SSDInceptionV2FeatureExtractor
from object_detection.models.ssd_mobilenet_v1_feature_extractor import SSDMobileNetV1FeatureExtractor
from object_detection.protos import model_pb2
FEATURE_EXTRACTOR_MAPS = {
'faster_rcnn_resnet50':
frcnn_resnet_v1.FasterRCNNResnet50FeatureExtractor,
'faster_rcnn_resnet101':
frcnn_resnet_v1.FasterRCNNResnet101FeatureExtractor,
'faster_rcnn_resnet152':
frcnn_resnet_v1.FasterRCNNResnet152FeatureExtractor
}
class ModelBuilderTest(tf.test.TestCase):
def create_model(self, model_config):
"""Builds a DetectionModel based on the model config.
Args:
model_config: A model.proto object containing the config for the desired
DetectionModel.
Returns:
DetectionModel based on the config.
"""
return model_builder.build(model_config, is_training=True)
def test_create_ssd_inception_v2_model_from_config(self):
model_text_proto = """
ssd {
feature_extractor {
type: 'ssd_inception_v2'
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
box_coder {
faster_rcnn_box_coder {
}
}
matcher {
argmax_matcher {
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
aspect_ratios: 1.0
}
}
image_resizer {
fixed_shape_resizer {
height: 320
width: 320
}
}
box_predictor {
convolutional_box_predictor {
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
}
loss {
classification_loss {
weighted_softmax {
}
}
localization_loss {
weighted_smooth_l1 {
}
}
}
}"""
model_proto = model_pb2.DetectionModel()
text_format.Merge(model_text_proto, model_proto)
model = self.create_model(model_proto)
self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
self.assertIsInstance(model._feature_extractor,
SSDInceptionV2FeatureExtractor)
def test_create_ssd_mobilenet_v1_model_from_config(self):
model_text_proto = """
ssd {
feature_extractor {
type: 'ssd_mobilenet_v1'
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
box_coder {
faster_rcnn_box_coder {
}
}
matcher {
argmax_matcher {
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
aspect_ratios: 1.0
}
}
image_resizer {
fixed_shape_resizer {
height: 320
width: 320
}
}
box_predictor {
convolutional_box_predictor {
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
}
loss {
classification_loss {
weighted_softmax {
}
}
localization_loss {
weighted_smooth_l1 {
}
}
}
}"""
model_proto = model_pb2.DetectionModel()
text_format.Merge(model_text_proto, model_proto)
model = self.create_model(model_proto)
self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
self.assertIsInstance(model._feature_extractor,
SSDMobileNetV1FeatureExtractor)
def test_create_faster_rcnn_resnet_v1_models_from_config(self):
model_text_proto = """
faster_rcnn {
num_classes: 3
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 600
max_dimension: 1024
}
}
feature_extractor {
type: 'faster_rcnn_resnet101'
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
second_stage_box_predictor {
mask_rcnn_box_predictor {
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.01
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
}"""
model_proto = model_pb2.DetectionModel()
text_format.Merge(model_text_proto, model_proto)
for extractor_type, extractor_class in FEATURE_EXTRACTOR_MAPS.items():
model_proto.faster_rcnn.feature_extractor.type = extractor_type
model = model_builder.build(model_proto, is_training=True)
self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch)
self.assertIsInstance(model._feature_extractor, extractor_class)
def test_create_faster_rcnn_inception_resnet_v2_model_from_config(self):
model_text_proto = """
faster_rcnn {
num_classes: 3
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 600
max_dimension: 1024
}
}
feature_extractor {
type: 'faster_rcnn_inception_resnet_v2'
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
initial_crop_size: 17
maxpool_kernel_size: 1
maxpool_stride: 1
second_stage_box_predictor {
mask_rcnn_box_predictor {
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.01
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
}"""
model_proto = model_pb2.DetectionModel()
text_format.Merge(model_text_proto, model_proto)
model = model_builder.build(model_proto, is_training=True)
self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch)
self.assertIsInstance(
model._feature_extractor,
frcnn_inc_res.FasterRCNNInceptionResnetV2FeatureExtractor)
def test_create_faster_rcnn_model_from_config_with_example_miner(self):
model_text_proto = """
faster_rcnn {
num_classes: 3
feature_extractor {
type: 'faster_rcnn_inception_resnet_v2'
}
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 600
max_dimension: 1024
}
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
second_stage_box_predictor {
mask_rcnn_box_predictor {
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
}
hard_example_miner {
num_hard_examples: 10
iou_threshold: 0.99
}
}"""
model_proto = model_pb2.DetectionModel()
text_format.Merge(model_text_proto, model_proto)
model = model_builder.build(model_proto, is_training=True)
self.assertIsNotNone(model._hard_example_miner)
def test_create_rfcn_resnet_v1_model_from_config(self):
model_text_proto = """
faster_rcnn {
num_classes: 3
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 600
max_dimension: 1024
}
}
feature_extractor {
type: 'faster_rcnn_resnet101'
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
second_stage_box_predictor {
rfcn_box_predictor {
conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.01
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
}"""
model_proto = model_pb2.DetectionModel()
text_format.Merge(model_text_proto, model_proto)
for extractor_type, extractor_class in FEATURE_EXTRACTOR_MAPS.items():
model_proto.faster_rcnn.feature_extractor.type = extractor_type
model = model_builder.build(model_proto, is_training=True)
self.assertIsInstance(model, rfcn_meta_arch.RFCNMetaArch)
self.assertIsInstance(model._feature_extractor, extractor_class)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functions to build DetectionModel training optimizers."""
import tensorflow as tf
from object_detection.utils import learning_schedules
slim = tf.contrib.slim
def build(optimizer_config, global_summaries):
"""Create optimizer based on config.
Args:
optimizer_config: A Optimizer proto message.
global_summaries: A set to attach learning rate summary to.
Returns:
An optimizer.
Raises:
ValueError: when using an unsupported input data type.
"""
optimizer_type = optimizer_config.WhichOneof('optimizer')
optimizer = None
if optimizer_type == 'rms_prop_optimizer':
config = optimizer_config.rms_prop_optimizer
optimizer = tf.train.RMSPropOptimizer(
_create_learning_rate(config.learning_rate, global_summaries),
decay=config.decay,
momentum=config.momentum_optimizer_value,
epsilon=config.epsilon)
if optimizer_type == 'momentum_optimizer':
config = optimizer_config.momentum_optimizer
optimizer = tf.train.MomentumOptimizer(
_create_learning_rate(config.learning_rate, global_summaries),
momentum=config.momentum_optimizer_value)
if optimizer_type == 'adam_optimizer':
config = optimizer_config.adam_optimizer
optimizer = tf.train.AdamOptimizer(
_create_learning_rate(config.learning_rate, global_summaries))
if optimizer is None:
raise ValueError('Optimizer %s not supported.' % optimizer_type)
if optimizer_config.use_moving_average:
optimizer = tf.contrib.opt.MovingAverageOptimizer(
optimizer, average_decay=optimizer_config.moving_average_decay)
return optimizer
def _create_learning_rate(learning_rate_config, global_summaries):
"""Create optimizer learning rate based on config.
Args:
learning_rate_config: A LearningRate proto message.
global_summaries: A set to attach learning rate summary to.
Returns:
A learning rate.
Raises:
ValueError: when using an unsupported input data type.
"""
learning_rate = None
learning_rate_type = learning_rate_config.WhichOneof('learning_rate')
if learning_rate_type == 'constant_learning_rate':
config = learning_rate_config.constant_learning_rate
learning_rate = config.learning_rate
if learning_rate_type == 'exponential_decay_learning_rate':
config = learning_rate_config.exponential_decay_learning_rate
learning_rate = tf.train.exponential_decay(
config.initial_learning_rate,
slim.get_or_create_global_step(),
config.decay_steps,
config.decay_factor,
staircase=config.staircase)
if learning_rate_type == 'manual_step_learning_rate':
config = learning_rate_config.manual_step_learning_rate
if not config.schedule:
raise ValueError('Empty learning rate schedule.')
learning_rate_step_boundaries = [x.step for x in config.schedule]
learning_rate_sequence = [config.initial_learning_rate]
learning_rate_sequence += [x.learning_rate for x in config.schedule]
learning_rate = learning_schedules.manual_stepping(
slim.get_or_create_global_step(), learning_rate_step_boundaries,
learning_rate_sequence)
if learning_rate is None:
raise ValueError('Learning_rate %s not supported.' % learning_rate_type)
global_summaries.add(tf.summary.scalar('Learning Rate', learning_rate))
return learning_rate
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for optimizer_builder."""
import tensorflow as tf
from google.protobuf import text_format
from object_detection.builders import optimizer_builder
from object_detection.protos import optimizer_pb2
class LearningRateBuilderTest(tf.test.TestCase):
def testBuildConstantLearningRate(self):
learning_rate_text_proto = """
constant_learning_rate {
learning_rate: 0.004
}
"""
global_summaries = set([])
learning_rate_proto = optimizer_pb2.LearningRate()
text_format.Merge(learning_rate_text_proto, learning_rate_proto)
learning_rate = optimizer_builder._create_learning_rate(
learning_rate_proto, global_summaries)
self.assertAlmostEqual(learning_rate, 0.004)
def testBuildExponentialDecayLearningRate(self):
learning_rate_text_proto = """
exponential_decay_learning_rate {
initial_learning_rate: 0.004
decay_steps: 99999
decay_factor: 0.85
staircase: false
}
"""
global_summaries = set([])
learning_rate_proto = optimizer_pb2.LearningRate()
text_format.Merge(learning_rate_text_proto, learning_rate_proto)
learning_rate = optimizer_builder._create_learning_rate(
learning_rate_proto, global_summaries)
self.assertTrue(isinstance(learning_rate, tf.Tensor))
def testBuildManualStepLearningRate(self):
learning_rate_text_proto = """
manual_step_learning_rate {
schedule {
step: 0
learning_rate: 0.006
}
schedule {
step: 90000
learning_rate: 0.00006
}
}
"""
global_summaries = set([])
learning_rate_proto = optimizer_pb2.LearningRate()
text_format.Merge(learning_rate_text_proto, learning_rate_proto)
learning_rate = optimizer_builder._create_learning_rate(
learning_rate_proto, global_summaries)
self.assertTrue(isinstance(learning_rate, tf.Tensor))
def testRaiseErrorOnEmptyLearningRate(self):
learning_rate_text_proto = """
"""
global_summaries = set([])
learning_rate_proto = optimizer_pb2.LearningRate()
text_format.Merge(learning_rate_text_proto, learning_rate_proto)
with self.assertRaises(ValueError):
optimizer_builder._create_learning_rate(
learning_rate_proto, global_summaries)
class OptimizerBuilderTest(tf.test.TestCase):
def testBuildRMSPropOptimizer(self):
optimizer_text_proto = """
rms_prop_optimizer: {
learning_rate: {
exponential_decay_learning_rate {
initial_learning_rate: 0.004
decay_steps: 800720
decay_factor: 0.95
}
}
momentum_optimizer_value: 0.9
decay: 0.9
epsilon: 1.0
}
use_moving_average: false
"""
global_summaries = set([])
optimizer_proto = optimizer_pb2.Optimizer()
text_format.Merge(optimizer_text_proto, optimizer_proto)
optimizer = optimizer_builder.build(optimizer_proto, global_summaries)
self.assertTrue(isinstance(optimizer, tf.train.RMSPropOptimizer))
def testBuildMomentumOptimizer(self):
optimizer_text_proto = """
momentum_optimizer: {
learning_rate: {
constant_learning_rate {
learning_rate: 0.001
}
}
momentum_optimizer_value: 0.99
}
use_moving_average: false
"""
global_summaries = set([])
optimizer_proto = optimizer_pb2.Optimizer()
text_format.Merge(optimizer_text_proto, optimizer_proto)
optimizer = optimizer_builder.build(optimizer_proto, global_summaries)
self.assertTrue(isinstance(optimizer, tf.train.MomentumOptimizer))
def testBuildAdamOptimizer(self):
optimizer_text_proto = """
adam_optimizer: {
learning_rate: {
constant_learning_rate {
learning_rate: 0.002
}
}
}
use_moving_average: false
"""
global_summaries = set([])
optimizer_proto = optimizer_pb2.Optimizer()
text_format.Merge(optimizer_text_proto, optimizer_proto)
optimizer = optimizer_builder.build(optimizer_proto, global_summaries)
self.assertTrue(isinstance(optimizer, tf.train.AdamOptimizer))
def testBuildMovingAverageOptimizer(self):
optimizer_text_proto = """
adam_optimizer: {
learning_rate: {
constant_learning_rate {
learning_rate: 0.002
}
}
}
use_moving_average: True
"""
global_summaries = set([])
optimizer_proto = optimizer_pb2.Optimizer()
text_format.Merge(optimizer_text_proto, optimizer_proto)
optimizer = optimizer_builder.build(optimizer_proto, global_summaries)
self.assertTrue(
isinstance(optimizer, tf.contrib.opt.MovingAverageOptimizer))
def testBuildMovingAverageOptimizerWithNonDefaultDecay(self):
optimizer_text_proto = """
adam_optimizer: {
learning_rate: {
constant_learning_rate {
learning_rate: 0.002
}
}
}
use_moving_average: True
moving_average_decay: 0.2
"""
global_summaries = set([])
optimizer_proto = optimizer_pb2.Optimizer()
text_format.Merge(optimizer_text_proto, optimizer_proto)
optimizer = optimizer_builder.build(optimizer_proto, global_summaries)
self.assertTrue(
isinstance(optimizer, tf.contrib.opt.MovingAverageOptimizer))
# TODO: Find a way to not depend on the private members.
self.assertAlmostEqual(optimizer._ema._decay, 0.2)
def testBuildEmptyOptimizer(self):
optimizer_text_proto = """
"""
global_summaries = set([])
optimizer_proto = optimizer_pb2.Optimizer()
text_format.Merge(optimizer_text_proto, optimizer_proto)
with self.assertRaises(ValueError):
optimizer_builder.build(optimizer_proto, global_summaries)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Builder function for post processing operations."""
import functools
import tensorflow as tf
from object_detection.core import post_processing
from object_detection.protos import post_processing_pb2
def build(post_processing_config):
"""Builds callables for post-processing operations.
Builds callables for non-max suppression and score conversion based on the
configuration.
Non-max suppression callable takes `boxes`, `scores`, and optionally
`clip_window`, `parallel_iterations` and `scope` as inputs. It returns
`nms_boxes`, `nms_scores`, `nms_nms_classes` and `num_detections`. See
post_processing.batch_multiclass_non_max_suppression for the type and shape
of these tensors.
Score converter callable should be called with `input` tensor. The callable
returns the output from one of 3 tf operations based on the configuration -
tf.identity, tf.sigmoid or tf.nn.softmax. See tensorflow documentation for
argument and return value descriptions.
Args:
post_processing_config: post_processing.proto object containing the
parameters for the post-processing operations.
Returns:
non_max_suppressor_fn: Callable for non-max suppression.
score_converter_fn: Callable for score conversion.
Raises:
ValueError: if the post_processing_config is of incorrect type.
"""
if not isinstance(post_processing_config, post_processing_pb2.PostProcessing):
raise ValueError('post_processing_config not of type '
'post_processing_pb2.Postprocessing.')
non_max_suppressor_fn = _build_non_max_suppressor(
post_processing_config.batch_non_max_suppression)
score_converter_fn = _build_score_converter(
post_processing_config.score_converter)
return non_max_suppressor_fn, score_converter_fn
def _build_non_max_suppressor(nms_config):
"""Builds non-max suppresson based on the nms config.
Args:
nms_config: post_processing_pb2.PostProcessing.BatchNonMaxSuppression proto.
Returns:
non_max_suppressor_fn: Callable non-max suppressor.
Raises:
ValueError: On incorrect iou_threshold or on incompatible values of
max_total_detections and max_detections_per_class.
"""
if nms_config.iou_threshold < 0 or nms_config.iou_threshold > 1.0:
raise ValueError('iou_threshold not in [0, 1.0].')
if nms_config.max_detections_per_class > nms_config.max_total_detections:
raise ValueError('max_detections_per_class should be no greater than '
'max_total_detections.')
non_max_suppressor_fn = functools.partial(
post_processing.batch_multiclass_non_max_suppression,
score_thresh=nms_config.score_threshold,
iou_thresh=nms_config.iou_threshold,
max_size_per_class=nms_config.max_detections_per_class,
max_total_size=nms_config.max_total_detections)
return non_max_suppressor_fn
def _build_score_converter(score_converter_config):
"""Builds score converter based on the config.
Builds one of [tf.identity, tf.sigmoid, tf.softmax] score converters based on
the config.
Args:
score_converter_config: post_processing_pb2.PostProcessing.score_converter.
Returns:
Callable score converter op.
Raises:
ValueError: On unknown score converter.
"""
if score_converter_config == post_processing_pb2.PostProcessing.IDENTITY:
return tf.identity
if score_converter_config == post_processing_pb2.PostProcessing.SIGMOID:
return tf.sigmoid
if score_converter_config == post_processing_pb2.PostProcessing.SOFTMAX:
return tf.nn.softmax
raise ValueError('Unknown score converter.')
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for post_processing_builder."""
import tensorflow as tf
from google.protobuf import text_format
from object_detection.builders import post_processing_builder
from object_detection.protos import post_processing_pb2
class PostProcessingBuilderTest(tf.test.TestCase):
def test_build_non_max_suppressor_with_correct_parameters(self):
post_processing_text_proto = """
batch_non_max_suppression {
score_threshold: 0.7
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
"""
post_processing_config = post_processing_pb2.PostProcessing()
text_format.Merge(post_processing_text_proto, post_processing_config)
non_max_suppressor, _ = post_processing_builder.build(
post_processing_config)
self.assertEqual(non_max_suppressor.keywords['max_size_per_class'], 100)
self.assertEqual(non_max_suppressor.keywords['max_total_size'], 300)
self.assertAlmostEqual(non_max_suppressor.keywords['score_thresh'], 0.7)
self.assertAlmostEqual(non_max_suppressor.keywords['iou_thresh'], 0.6)
def test_build_identity_score_converter(self):
post_processing_text_proto = """
score_converter: IDENTITY
"""
post_processing_config = post_processing_pb2.PostProcessing()
text_format.Merge(post_processing_text_proto, post_processing_config)
_, score_converter = post_processing_builder.build(post_processing_config)
self.assertEqual(score_converter, tf.identity)
def test_build_sigmoid_score_converter(self):
post_processing_text_proto = """
score_converter: SIGMOID
"""
post_processing_config = post_processing_pb2.PostProcessing()
text_format.Merge(post_processing_text_proto, post_processing_config)
_, score_converter = post_processing_builder.build(post_processing_config)
self.assertEqual(score_converter, tf.sigmoid)
def test_build_softmax_score_converter(self):
post_processing_text_proto = """
score_converter: SOFTMAX
"""
post_processing_config = post_processing_pb2.PostProcessing()
text_format.Merge(post_processing_text_proto, post_processing_config)
_, score_converter = post_processing_builder.build(post_processing_config)
self.assertEqual(score_converter, tf.nn.softmax)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Builder for preprocessing steps."""
import tensorflow as tf
from object_detection.core import preprocessor
from object_detection.protos import preprocessor_pb2
def _get_step_config_from_proto(preprocessor_step_config, step_name):
"""Returns the value of a field named step_name from proto.
Args:
preprocessor_step_config: A preprocessor_pb2.PreprocessingStep object.
step_name: Name of the field to get value from.
Returns:
result_dict: a sub proto message from preprocessor_step_config which will be
later converted to a dictionary.
Raises:
ValueError: If field does not exist in proto.
"""
for field, value in preprocessor_step_config.ListFields():
if field.name == step_name:
return value
raise ValueError('Could not get field %s from proto!', step_name)
def _get_dict_from_proto(config):
"""Helper function to put all proto fields into a dictionary.
For many preprocessing steps, there's an trivial 1-1 mapping from proto fields
to function arguments. This function automatically populates a dictionary with
the arguments from the proto.
Protos that CANNOT be trivially populated include:
* nested messages.
* steps that check if an optional field is set (ie. where None != 0).
* protos that don't map 1-1 to arguments (ie. list should be reshaped).
* fields requiring additional validation (ie. repeated field has n elements).
Args:
config: A protobuf object that does not violate the conditions above.
Returns:
result_dict: |config| converted into a python dictionary.
"""
result_dict = {}
for field, value in config.ListFields():
result_dict[field.name] = value
return result_dict
# A map from a PreprocessingStep proto config field name to the preprocessing
# function that should be used. The PreprocessingStep proto should be parsable
# with _get_dict_from_proto.
PREPROCESSING_FUNCTION_MAP = {
'normalize_image': preprocessor.normalize_image,
'random_horizontal_flip': preprocessor.random_horizontal_flip,
'random_pixel_value_scale': preprocessor.random_pixel_value_scale,
'random_image_scale': preprocessor.random_image_scale,
'random_rgb_to_gray': preprocessor.random_rgb_to_gray,
'random_adjust_brightness': preprocessor.random_adjust_brightness,
'random_adjust_contrast': preprocessor.random_adjust_contrast,
'random_adjust_hue': preprocessor.random_adjust_hue,
'random_adjust_saturation': preprocessor.random_adjust_saturation,
'random_distort_color': preprocessor.random_distort_color,
'random_jitter_boxes': preprocessor.random_jitter_boxes,
'random_crop_to_aspect_ratio': preprocessor.random_crop_to_aspect_ratio,
'random_black_patches': preprocessor.random_black_patches,
'scale_boxes_to_pixel_coordinates': (
preprocessor.scale_boxes_to_pixel_coordinates),
'subtract_channel_mean': preprocessor.subtract_channel_mean,
}
# A map to convert from preprocessor_pb2.ResizeImage.Method enum to
# tf.image.ResizeMethod.
RESIZE_METHOD_MAP = {
preprocessor_pb2.ResizeImage.AREA: tf.image.ResizeMethod.AREA,
preprocessor_pb2.ResizeImage.BICUBIC: tf.image.ResizeMethod.BICUBIC,
preprocessor_pb2.ResizeImage.BILINEAR: tf.image.ResizeMethod.BILINEAR,
preprocessor_pb2.ResizeImage.NEAREST_NEIGHBOR: (
tf.image.ResizeMethod.NEAREST_NEIGHBOR),
}
def build(preprocessor_step_config):
"""Builds preprocessing step based on the configuration.
Args:
preprocessor_step_config: PreprocessingStep configuration proto.
Returns:
function, argmap: A callable function and an argument map to call function
with.
Raises:
ValueError: On invalid configuration.
"""
step_type = preprocessor_step_config.WhichOneof('preprocessing_step')
if step_type in PREPROCESSING_FUNCTION_MAP:
preprocessing_function = PREPROCESSING_FUNCTION_MAP[step_type]
step_config = _get_step_config_from_proto(preprocessor_step_config,
step_type)
function_args = _get_dict_from_proto(step_config)
return (preprocessing_function, function_args)
if step_type == 'random_crop_image':
config = preprocessor_step_config.random_crop_image
return (preprocessor.random_crop_image,
{
'min_object_covered': config.min_object_covered,
'aspect_ratio_range': (config.min_aspect_ratio,
config.max_aspect_ratio),
'area_range': (config.min_area, config.max_area),
'overlap_thresh': config.overlap_thresh,
'random_coef': config.random_coef,
})
if step_type == 'random_pad_image':
config = preprocessor_step_config.random_pad_image
min_image_size = None
if (config.HasField('min_image_height') !=
config.HasField('min_image_width')):
raise ValueError('min_image_height and min_image_width should be either '
'both set or both unset.')
if config.HasField('min_image_height'):
min_image_size = (config.min_image_height, config.min_image_width)
max_image_size = None
if (config.HasField('max_image_height') !=
config.HasField('max_image_width')):
raise ValueError('max_image_height and max_image_width should be either '
'both set or both unset.')
if config.HasField('max_image_height'):
max_image_size = (config.max_image_height, config.max_image_width)
pad_color = config.pad_color
if pad_color and len(pad_color) != 3:
raise ValueError('pad_color should have 3 elements (RGB) if set!')
if not pad_color:
pad_color = None
return (preprocessor.random_pad_image,
{
'min_image_size': min_image_size,
'max_image_size': max_image_size,
'pad_color': pad_color,
})
if step_type == 'random_crop_pad_image':
config = preprocessor_step_config.random_crop_pad_image
min_padded_size_ratio = config.min_padded_size_ratio
if min_padded_size_ratio and len(min_padded_size_ratio) != 2:
raise ValueError('min_padded_size_ratio should have 3 elements if set!')
max_padded_size_ratio = config.max_padded_size_ratio
if max_padded_size_ratio and len(max_padded_size_ratio) != 2:
raise ValueError('max_padded_size_ratio should have 3 elements if set!')
pad_color = config.pad_color
if pad_color and len(pad_color) != 3:
raise ValueError('pad_color should have 3 elements if set!')
return (preprocessor.random_crop_pad_image,
{
'min_object_covered': config.min_object_covered,
'aspect_ratio_range': (config.min_aspect_ratio,
config.max_aspect_ratio),
'area_range': (config.min_area, config.max_area),
'overlap_thresh': config.overlap_thresh,
'random_coef': config.random_coef,
'min_padded_size_ratio': (min_padded_size_ratio if
min_padded_size_ratio else None),
'max_padded_size_ratio': (max_padded_size_ratio if
max_padded_size_ratio else None),
'pad_color': (pad_color if pad_color else None),
})
if step_type == 'random_resize_method':
config = preprocessor_step_config.random_resize_method
return (preprocessor.random_resize_method,
{
'target_size': [config.target_height, config.target_width],
})
if step_type == 'resize_image':
config = preprocessor_step_config.resize_image
method = RESIZE_METHOD_MAP[config.method]
return (preprocessor.resize_image,
{
'new_height': config.new_height,
'new_width': config.new_width,
'method': method
})
if step_type == 'ssd_random_crop':
config = preprocessor_step_config.ssd_random_crop
if config.operations:
min_object_covered = [op.min_object_covered for op in config.operations]
aspect_ratio_range = [(op.min_aspect_ratio, op.max_aspect_ratio)
for op in config.operations]
area_range = [(op.min_area, op.max_area) for op in config.operations]
overlap_thresh = [op.overlap_thresh for op in config.operations]
random_coef = [op.random_coef for op in config.operations]
return (preprocessor.ssd_random_crop,
{
'min_object_covered': min_object_covered,
'aspect_ratio_range': aspect_ratio_range,
'area_range': area_range,
'overlap_thresh': overlap_thresh,
'random_coef': random_coef,
})
return (preprocessor.ssd_random_crop, {})
if step_type == 'ssd_random_crop_pad':
config = preprocessor_step_config.ssd_random_crop_pad
if config.operations:
min_object_covered = [op.min_object_covered for op in config.operations]
aspect_ratio_range = [(op.min_aspect_ratio, op.max_aspect_ratio)
for op in config.operations]
area_range = [(op.min_area, op.max_area) for op in config.operations]
overlap_thresh = [op.overlap_thresh for op in config.operations]
random_coef = [op.random_coef for op in config.operations]
min_padded_size_ratio = [
(op.min_padded_size_ratio[0], op.min_padded_size_ratio[1])
for op in config.operations]
max_padded_size_ratio = [
(op.max_padded_size_ratio[0], op.max_padded_size_ratio[1])
for op in config.operations]
pad_color = [(op.pad_color_r, op.pad_color_g, op.pad_color_b)
for op in config.operations]
return (preprocessor.ssd_random_crop_pad,
{
'min_object_covered': min_object_covered,
'aspect_ratio_range': aspect_ratio_range,
'area_range': area_range,
'overlap_thresh': overlap_thresh,
'random_coef': random_coef,
'min_padded_size_ratio': min_padded_size_ratio,
'max_padded_size_ratio': max_padded_size_ratio,
'pad_color': pad_color,
})
return (preprocessor.ssd_random_crop_pad, {})
if step_type == 'ssd_random_crop_fixed_aspect_ratio':
config = preprocessor_step_config.ssd_random_crop_fixed_aspect_ratio
if config.operations:
min_object_covered = [op.min_object_covered for op in config.operations]
area_range = [(op.min_area, op.max_area) for op in config.operations]
overlap_thresh = [op.overlap_thresh for op in config.operations]
random_coef = [op.random_coef for op in config.operations]
return (preprocessor.ssd_random_crop_fixed_aspect_ratio,
{
'min_object_covered': min_object_covered,
'aspect_ratio': config.aspect_ratio,
'area_range': area_range,
'overlap_thresh': overlap_thresh,
'random_coef': random_coef,
})
return (preprocessor.ssd_random_crop_fixed_aspect_ratio, {})
raise ValueError('Unknown preprocessing step.')
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for preprocessor_builder."""
import tensorflow as tf
from google.protobuf import text_format
from object_detection.builders import preprocessor_builder
from object_detection.core import preprocessor
from object_detection.protos import preprocessor_pb2
class PreprocessorBuilderTest(tf.test.TestCase):
def assert_dictionary_close(self, dict1, dict2):
"""Helper to check if two dicts with floatst or integers are close."""
self.assertEqual(sorted(dict1.keys()), sorted(dict2.keys()))
for key in dict1:
value = dict1[key]
if isinstance(value, float):
self.assertAlmostEqual(value, dict2[key])
else:
self.assertEqual(value, dict2[key])
def test_build_normalize_image(self):
preprocessor_text_proto = """
normalize_image {
original_minval: 0.0
original_maxval: 255.0
target_minval: -1.0
target_maxval: 1.0
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.normalize_image)
self.assertEqual(args, {
'original_minval': 0.0,
'original_maxval': 255.0,
'target_minval': -1.0,
'target_maxval': 1.0,
})
def test_build_random_horizontal_flip(self):
preprocessor_text_proto = """
random_horizontal_flip {
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_horizontal_flip)
self.assertEqual(args, {})
def test_build_random_pixel_value_scale(self):
preprocessor_text_proto = """
random_pixel_value_scale {
minval: 0.8
maxval: 1.2
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_pixel_value_scale)
self.assert_dictionary_close(args, {'minval': 0.8, 'maxval': 1.2})
def test_build_random_image_scale(self):
preprocessor_text_proto = """
random_image_scale {
min_scale_ratio: 0.8
max_scale_ratio: 2.2
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_image_scale)
self.assert_dictionary_close(args, {'min_scale_ratio': 0.8,
'max_scale_ratio': 2.2})
def test_build_random_rgb_to_gray(self):
preprocessor_text_proto = """
random_rgb_to_gray {
probability: 0.8
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_rgb_to_gray)
self.assert_dictionary_close(args, {'probability': 0.8})
def test_build_random_adjust_brightness(self):
preprocessor_text_proto = """
random_adjust_brightness {
max_delta: 0.2
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_adjust_brightness)
self.assert_dictionary_close(args, {'max_delta': 0.2})
def test_build_random_adjust_contrast(self):
preprocessor_text_proto = """
random_adjust_contrast {
min_delta: 0.7
max_delta: 1.1
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_adjust_contrast)
self.assert_dictionary_close(args, {'min_delta': 0.7, 'max_delta': 1.1})
def test_build_random_adjust_hue(self):
preprocessor_text_proto = """
random_adjust_hue {
max_delta: 0.01
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_adjust_hue)
self.assert_dictionary_close(args, {'max_delta': 0.01})
def test_build_random_adjust_saturation(self):
preprocessor_text_proto = """
random_adjust_saturation {
min_delta: 0.75
max_delta: 1.15
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_adjust_saturation)
self.assert_dictionary_close(args, {'min_delta': 0.75, 'max_delta': 1.15})
def test_build_random_distort_color(self):
preprocessor_text_proto = """
random_distort_color {
color_ordering: 1
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_distort_color)
self.assertEqual(args, {'color_ordering': 1})
def test_build_random_jitter_boxes(self):
preprocessor_text_proto = """
random_jitter_boxes {
ratio: 0.1
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_jitter_boxes)
self.assert_dictionary_close(args, {'ratio': 0.1})
def test_build_random_crop_image(self):
preprocessor_text_proto = """
random_crop_image {
min_object_covered: 0.75
min_aspect_ratio: 0.75
max_aspect_ratio: 1.5
min_area: 0.25
max_area: 0.875
overlap_thresh: 0.5
random_coef: 0.125
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_crop_image)
self.assertEqual(args, {
'min_object_covered': 0.75,
'aspect_ratio_range': (0.75, 1.5),
'area_range': (0.25, 0.875),
'overlap_thresh': 0.5,
'random_coef': 0.125,
})
def test_build_random_pad_image(self):
preprocessor_text_proto = """
random_pad_image {
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_pad_image)
self.assertEqual(args, {
'min_image_size': None,
'max_image_size': None,
'pad_color': None,
})
def test_build_random_crop_pad_image(self):
preprocessor_text_proto = """
random_crop_pad_image {
min_object_covered: 0.75
min_aspect_ratio: 0.75
max_aspect_ratio: 1.5
min_area: 0.25
max_area: 0.875
overlap_thresh: 0.5
random_coef: 0.125
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_crop_pad_image)
self.assertEqual(args, {
'min_object_covered': 0.75,
'aspect_ratio_range': (0.75, 1.5),
'area_range': (0.25, 0.875),
'overlap_thresh': 0.5,
'random_coef': 0.125,
'min_padded_size_ratio': None,
'max_padded_size_ratio': None,
'pad_color': None,
})
def test_build_random_crop_to_aspect_ratio(self):
preprocessor_text_proto = """
random_crop_to_aspect_ratio {
aspect_ratio: 0.85
overlap_thresh: 0.35
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_crop_to_aspect_ratio)
self.assert_dictionary_close(args, {'aspect_ratio': 0.85,
'overlap_thresh': 0.35})
def test_build_random_black_patches(self):
preprocessor_text_proto = """
random_black_patches {
max_black_patches: 20
probability: 0.95
size_to_image_ratio: 0.12
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_black_patches)
self.assert_dictionary_close(args, {'max_black_patches': 20,
'probability': 0.95,
'size_to_image_ratio': 0.12})
def test_build_random_resize_method(self):
preprocessor_text_proto = """
random_resize_method {
target_height: 75
target_width: 100
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_resize_method)
self.assert_dictionary_close(args, {'target_size': [75, 100]})
def test_build_scale_boxes_to_pixel_coordinates(self):
preprocessor_text_proto = """
scale_boxes_to_pixel_coordinates {}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.scale_boxes_to_pixel_coordinates)
self.assertEqual(args, {})
def test_build_resize_image(self):
preprocessor_text_proto = """
resize_image {
new_height: 75
new_width: 100
method: BICUBIC
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.resize_image)
self.assertEqual(args, {'new_height': 75,
'new_width': 100,
'method': tf.image.ResizeMethod.BICUBIC})
def test_build_subtract_channel_mean(self):
preprocessor_text_proto = """
subtract_channel_mean {
means: [1.0, 2.0, 3.0]
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.subtract_channel_mean)
self.assertEqual(args, {'means': [1.0, 2.0, 3.0]})
def test_build_ssd_random_crop(self):
preprocessor_text_proto = """
ssd_random_crop {
operations {
min_object_covered: 0.0
min_aspect_ratio: 0.875
max_aspect_ratio: 1.125
min_area: 0.5
max_area: 1.0
overlap_thresh: 0.0
random_coef: 0.375
}
operations {
min_object_covered: 0.25
min_aspect_ratio: 0.75
max_aspect_ratio: 1.5
min_area: 0.5
max_area: 1.0
overlap_thresh: 0.25
random_coef: 0.375
}
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.ssd_random_crop)
self.assertEqual(args, {'min_object_covered': [0.0, 0.25],
'aspect_ratio_range': [(0.875, 1.125), (0.75, 1.5)],
'area_range': [(0.5, 1.0), (0.5, 1.0)],
'overlap_thresh': [0.0, 0.25],
'random_coef': [0.375, 0.375]})
def test_build_ssd_random_crop_empty_operations(self):
preprocessor_text_proto = """
ssd_random_crop {
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.ssd_random_crop)
self.assertEqual(args, {})
def test_build_ssd_random_crop_pad(self):
preprocessor_text_proto = """
ssd_random_crop_pad {
operations {
min_object_covered: 0.0
min_aspect_ratio: 0.875
max_aspect_ratio: 1.125
min_area: 0.5
max_area: 1.0
overlap_thresh: 0.0
random_coef: 0.375
min_padded_size_ratio: [0.0, 0.0]
max_padded_size_ratio: [2.0, 2.0]
pad_color_r: 0.5
pad_color_g: 0.5
pad_color_b: 0.5
}
operations {
min_object_covered: 0.25
min_aspect_ratio: 0.75
max_aspect_ratio: 1.5
min_area: 0.5
max_area: 1.0
overlap_thresh: 0.25
random_coef: 0.375
min_padded_size_ratio: [0.0, 0.0]
max_padded_size_ratio: [2.0, 2.0]
pad_color_r: 0.5
pad_color_g: 0.5
pad_color_b: 0.5
}
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.ssd_random_crop_pad)
self.assertEqual(args, {'min_object_covered': [0.0, 0.25],
'aspect_ratio_range': [(0.875, 1.125), (0.75, 1.5)],
'area_range': [(0.5, 1.0), (0.5, 1.0)],
'overlap_thresh': [0.0, 0.25],
'random_coef': [0.375, 0.375],
'min_padded_size_ratio': [(0.0, 0.0), (0.0, 0.0)],
'max_padded_size_ratio': [(2.0, 2.0), (2.0, 2.0)],
'pad_color': [(0.5, 0.5, 0.5), (0.5, 0.5, 0.5)]})
def test_build_ssd_random_crop_fixed_aspect_ratio(self):
preprocessor_text_proto = """
ssd_random_crop_fixed_aspect_ratio {
operations {
min_object_covered: 0.0
min_area: 0.5
max_area: 1.0
overlap_thresh: 0.0
random_coef: 0.375
}
operations {
min_object_covered: 0.25
min_area: 0.5
max_area: 1.0
overlap_thresh: 0.25
random_coef: 0.375
}
aspect_ratio: 0.875
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.ssd_random_crop_fixed_aspect_ratio)
self.assertEqual(args, {'min_object_covered': [0.0, 0.25],
'aspect_ratio': 0.875,
'area_range': [(0.5, 1.0), (0.5, 1.0)],
'overlap_thresh': [0.0, 0.25],
'random_coef': [0.375, 0.375]})
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Builder for region similarity calculators."""
from object_detection.core import region_similarity_calculator
from object_detection.protos import region_similarity_calculator_pb2
def build(region_similarity_calculator_config):
"""Builds region similarity calculator based on the configuration.
Builds one of [IouSimilarity, IoaSimilarity, NegSqDistSimilarity] objects. See
core/region_similarity_calculator.proto for details.
Args:
region_similarity_calculator_config: RegionSimilarityCalculator
configuration proto.
Returns:
region_similarity_calculator: RegionSimilarityCalculator object.
Raises:
ValueError: On unknown region similarity calculator.
"""
if not isinstance(
region_similarity_calculator_config,
region_similarity_calculator_pb2.RegionSimilarityCalculator):
raise ValueError(
'region_similarity_calculator_config not of type '
'region_similarity_calculator_pb2.RegionsSimilarityCalculator')
similarity_calculator = region_similarity_calculator_config.WhichOneof(
'region_similarity')
if similarity_calculator == 'iou_similarity':
return region_similarity_calculator.IouSimilarity()
if similarity_calculator == 'ioa_similarity':
return region_similarity_calculator.IoaSimilarity()
if similarity_calculator == 'neg_sq_dist_similarity':
return region_similarity_calculator.NegSqDistSimilarity()
raise ValueError('Unknown region similarity calculator.')
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for region_similarity_calculator_builder."""
import tensorflow as tf
from google.protobuf import text_format
from object_detection.builders import region_similarity_calculator_builder
from object_detection.core import region_similarity_calculator
from object_detection.protos import region_similarity_calculator_pb2 as sim_calc_pb2
class RegionSimilarityCalculatorBuilderTest(tf.test.TestCase):
def testBuildIoaSimilarityCalculator(self):
similarity_calc_text_proto = """
ioa_similarity {
}
"""
similarity_calc_proto = sim_calc_pb2.RegionSimilarityCalculator()
text_format.Merge(similarity_calc_text_proto, similarity_calc_proto)
similarity_calc = region_similarity_calculator_builder.build(
similarity_calc_proto)
self.assertTrue(isinstance(similarity_calc,
region_similarity_calculator.IoaSimilarity))
def testBuildIouSimilarityCalculator(self):
similarity_calc_text_proto = """
iou_similarity {
}
"""
similarity_calc_proto = sim_calc_pb2.RegionSimilarityCalculator()
text_format.Merge(similarity_calc_text_proto, similarity_calc_proto)
similarity_calc = region_similarity_calculator_builder.build(
similarity_calc_proto)
self.assertTrue(isinstance(similarity_calc,
region_similarity_calculator.IouSimilarity))
def testBuildNegSqDistSimilarityCalculator(self):
similarity_calc_text_proto = """
neg_sq_dist_similarity {
}
"""
similarity_calc_proto = sim_calc_pb2.RegionSimilarityCalculator()
text_format.Merge(similarity_calc_text_proto, similarity_calc_proto)
similarity_calc = region_similarity_calculator_builder.build(
similarity_calc_proto)
self.assertTrue(isinstance(similarity_calc,
region_similarity_calculator.
NegSqDistSimilarity))
if __name__ == '__main__':
tf.test.main()
# Tensorflow Object Detection API: Core.
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
# Apache 2.0
py_library(
name = "batcher",
srcs = ["batcher.py"],
deps = [
":prefetcher",
":preprocessor",
":standard_fields",
"//tensorflow",
],
)
py_test(
name = "batcher_test",
srcs = ["batcher_test.py"],
deps = [
":batcher",
"//tensorflow",
],
)
py_library(
name = "box_list",
srcs = [
"box_list.py",
],
deps = [
"//tensorflow",
],
)
py_test(
name = "box_list_test",
srcs = ["box_list_test.py"],
deps = [
":box_list",
],
)
py_library(
name = "box_list_ops",
srcs = [
"box_list_ops.py",
],
deps = [
":box_list",
"//tensorflow",
"//tensorflow_models/object_detection/utils:shape_utils",
],
)
py_test(
name = "box_list_ops_test",
srcs = ["box_list_ops_test.py"],
deps = [
":box_list",
":box_list_ops",
],
)
py_library(
name = "box_coder",
srcs = [
"box_coder.py",
],
deps = [
"//tensorflow",
],
)
py_test(
name = "box_coder_test",
srcs = [
"box_coder_test.py",
],
deps = [
":box_coder",
":box_list",
"//tensorflow",
],
)
py_library(
name = "keypoint_ops",
srcs = [
"keypoint_ops.py",
],
deps = [
"//tensorflow",
],
)
py_test(
name = "keypoint_ops_test",
srcs = ["keypoint_ops_test.py"],
deps = [
":keypoint_ops",
],
)
py_library(
name = "losses",
srcs = ["losses.py"],
deps = [
":box_list",
":box_list_ops",
"//tensorflow",
"//tensorflow_models/object_detection/utils:ops",
],
)
py_library(
name = "matcher",
srcs = [
"matcher.py",
],
deps = [
],
)
py_library(
name = "model",
srcs = ["model.py"],
deps = [
":standard_fields",
],
)
py_test(
name = "matcher_test",
srcs = [
"matcher_test.py",
],
deps = [
":matcher",
"//tensorflow",
],
)
py_library(
name = "prefetcher",
srcs = ["prefetcher.py"],
deps = ["//tensorflow"],
)
py_library(
name = "preprocessor",
srcs = [
"preprocessor.py",
],
deps = [
":box_list",
":box_list_ops",
":keypoint_ops",
":standard_fields",
"//tensorflow",
],
)
py_test(
name = "preprocessor_test",
srcs = [
"preprocessor_test.py",
],
deps = [
":preprocessor",
"//tensorflow",
],
)
py_test(
name = "losses_test",
srcs = ["losses_test.py"],
deps = [
":box_list",
":losses",
":matcher",
"//tensorflow",
],
)
py_test(
name = "prefetcher_test",
srcs = ["prefetcher_test.py"],
deps = [
":prefetcher",
"//tensorflow",
],
)
py_library(
name = "standard_fields",
srcs = [
"standard_fields.py",
],
)
py_library(
name = "post_processing",
srcs = ["post_processing.py"],
deps = [
":box_list",
":box_list_ops",
":standard_fields",
"//tensorflow",
],
)
py_test(
name = "post_processing_test",
srcs = ["post_processing_test.py"],
deps = [
":box_list",
":box_list_ops",
":post_processing",
"//tensorflow",
],
)
py_library(
name = "target_assigner",
srcs = [
"target_assigner.py",
],
deps = [
":box_list",
":box_list_ops",
":matcher",
":region_similarity_calculator",
"//tensorflow",
"//tensorflow_models/object_detection/box_coders:faster_rcnn_box_coder",
"//tensorflow_models/object_detection/box_coders:mean_stddev_box_coder",
"//tensorflow_models/object_detection/core:box_coder",
"//tensorflow_models/object_detection/matchers:argmax_matcher",
"//tensorflow_models/object_detection/matchers:bipartite_matcher",
],
)
py_test(
name = "target_assigner_test",
size = "large",
timeout = "long",
srcs = ["target_assigner_test.py"],
deps = [
":box_list",
":region_similarity_calculator",
":target_assigner",
"//tensorflow",
"//tensorflow_models/object_detection/box_coders:mean_stddev_box_coder",
"//tensorflow_models/object_detection/matchers:bipartite_matcher",
],
)
py_library(
name = "data_decoder",
srcs = ["data_decoder.py"],
)
py_library(
name = "box_predictor",
srcs = ["box_predictor.py"],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/utils:ops",
"//tensorflow_models/object_detection/utils:static_shape",
],
)
py_test(
name = "box_predictor_test",
srcs = ["box_predictor_test.py"],
deps = [
":box_predictor",
"//tensorflow",
"//tensorflow_models/object_detection/builders:hyperparams_builder",
"//tensorflow_models/object_detection/protos:hyperparams_py_pb2",
],
)
py_library(
name = "region_similarity_calculator",
srcs = [
"region_similarity_calculator.py",
],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/core:box_list_ops",
],
)
py_test(
name = "region_similarity_calculator_test",
srcs = [
"region_similarity_calculator_test.py",
],
deps = [
":region_similarity_calculator",
"//tensorflow_models/object_detection/core:box_list",
],
)
py_library(
name = "anchor_generator",
srcs = [
"anchor_generator.py",
],
deps = [
"//tensorflow",
],
)
py_library(
name = "minibatch_sampler",
srcs = [
"minibatch_sampler.py",
],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/utils:ops",
],
)
py_test(
name = "minibatch_sampler_test",
srcs = [
"minibatch_sampler_test.py",
],
deps = [
":minibatch_sampler",
"//tensorflow",
],
)
py_library(
name = "balanced_positive_negative_sampler",
srcs = [
"balanced_positive_negative_sampler.py",
],
deps = [
":minibatch_sampler",
"//tensorflow",
],
)
py_test(
name = "balanced_positive_negative_sampler_test",
srcs = [
"balanced_positive_negative_sampler_test.py",
],
deps = [
":balanced_positive_negative_sampler",
"//tensorflow",
],
)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Base anchor generator.
The job of the anchor generator is to create (or load) a collection
of bounding boxes to be used as anchors.
Generated anchors are assumed to match some convolutional grid or list of grid
shapes. For example, we might want to generate anchors matching an 8x8
feature map and a 4x4 feature map. If we place 3 anchors per grid location
on the first feature map and 6 anchors per grid location on the second feature
map, then 3*8*8 + 6*4*4 = 288 anchors are generated in total.
To support fully convolutional settings, feature map shapes are passed
dynamically at generation time. The number of anchors to place at each location
is static --- implementations of AnchorGenerator must always be able return
the number of anchors that it uses per location for each feature map.
"""
from abc import ABCMeta
from abc import abstractmethod
import tensorflow as tf
class AnchorGenerator(object):
"""Abstract base class for anchor generators."""
__metaclass__ = ABCMeta
@abstractmethod
def name_scope(self):
"""Name scope.
Must be defined by implementations.
Returns:
a string representing the name scope of the anchor generation operation.
"""
pass
@property
def check_num_anchors(self):
"""Whether to dynamically check the number of anchors generated.
Can be overridden by implementations that would like to disable this
behavior.
Returns:
a boolean controlling whether the Generate function should dynamically
check the number of anchors generated against the mathematically
expected number of anchors.
"""
return True
@abstractmethod
def num_anchors_per_location(self):
"""Returns the number of anchors per spatial location.
Returns:
a list of integers, one for each expected feature map to be passed to
the `generate` function.
"""
pass
def generate(self, feature_map_shape_list, **params):
"""Generates a collection of bounding boxes to be used as anchors.
TODO: remove **params from argument list and make stride and offsets (for
multiple_grid_anchor_generator) constructor arguments.
Args:
feature_map_shape_list: list of (height, width) pairs in the format
[(height_0, width_0), (height_1, width_1), ...] that the generated
anchors must align with. Pairs can be provided as 1-dimensional
integer tensors of length 2 or simply as tuples of integers.
**params: parameters for anchor generation op
Returns:
boxes: a BoxList holding a collection of N anchor boxes
Raises:
ValueError: if the number of feature map shapes does not match the length
of NumAnchorsPerLocation.
"""
if self.check_num_anchors and (
len(feature_map_shape_list) != len(self.num_anchors_per_location())):
raise ValueError('Number of feature maps is expected to equal the length '
'of `num_anchors_per_location`.')
with tf.name_scope(self.name_scope()):
anchors = self._generate(feature_map_shape_list, **params)
if self.check_num_anchors:
with tf.control_dependencies([
self._assert_correct_number_of_anchors(
anchors, feature_map_shape_list)]):
anchors.set(tf.identity(anchors.get()))
return anchors
@abstractmethod
def _generate(self, feature_map_shape_list, **params):
"""To be overridden by implementations.
Args:
feature_map_shape_list: list of (height, width) pairs in the format
[(height_0, width_0), (height_1, width_1), ...] that the generated
anchors must align with.
**params: parameters for anchor generation op
Returns:
boxes: a BoxList holding a collection of N anchor boxes
"""
pass
def _assert_correct_number_of_anchors(self, anchors, feature_map_shape_list):
"""Assert that correct number of anchors was generated.
Args:
anchors: box_list.BoxList object holding anchors generated
feature_map_shape_list: list of (height, width) pairs in the format
[(height_0, width_0), (height_1, width_1), ...] that the generated
anchors must align with.
Returns:
Op that raises InvalidArgumentError if the number of anchors does not
match the number of expected anchors.
"""
expected_num_anchors = 0
for num_anchors_per_location, feature_map_shape in zip(
self.num_anchors_per_location(), feature_map_shape_list):
expected_num_anchors += (num_anchors_per_location
* feature_map_shape[0]
* feature_map_shape[1])
return tf.assert_equal(expected_num_anchors, anchors.num_boxes())
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Class to subsample minibatches by balancing positives and negatives.
Subsamples minibatches based on a pre-specified positive fraction in range
[0,1]. The class presumes there are many more negatives than positive examples:
if the desired batch_size cannot be achieved with the pre-specified positive
fraction, it fills the rest with negative examples. If this is not sufficient
for obtaining the desired batch_size, it returns fewer examples.
The main function to call is Subsample(self, indicator, labels). For convenience
one can also call SubsampleWeights(self, weights, labels) which is defined in
the minibatch_sampler base class.
"""
import tensorflow as tf
from object_detection.core import minibatch_sampler
class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
"""Subsamples minibatches to a desired balance of positives and negatives."""
def __init__(self, positive_fraction=0.5):
"""Constructs a minibatch sampler.
Args:
positive_fraction: desired fraction of positive examples (scalar in [0,1])
Raises:
ValueError: if positive_fraction < 0, or positive_fraction > 1
"""
if positive_fraction < 0 or positive_fraction > 1:
raise ValueError('positive_fraction should be in range [0,1]. '
'Received: %s.' % positive_fraction)
self._positive_fraction = positive_fraction
def subsample(self, indicator, batch_size, labels):
"""Returns subsampled minibatch.
Args:
indicator: boolean tensor of shape [N] whose True entries can be sampled.
batch_size: desired batch size.
labels: boolean tensor of shape [N] denoting positive(=True) and negative
(=False) examples.
Returns:
is_sampled: boolean tensor of shape [N], True for entries which are
sampled.
Raises:
ValueError: if labels and indicator are not 1D boolean tensors.
"""
if len(indicator.get_shape().as_list()) != 1:
raise ValueError('indicator must be 1 dimensional, got a tensor of '
'shape %s' % indicator.get_shape())
if len(labels.get_shape().as_list()) != 1:
raise ValueError('labels must be 1 dimensional, got a tensor of '
'shape %s' % labels.get_shape())
if labels.dtype != tf.bool:
raise ValueError('labels should be of type bool. Received: %s' %
labels.dtype)
if indicator.dtype != tf.bool:
raise ValueError('indicator should be of type bool. Received: %s' %
indicator.dtype)
# Only sample from indicated samples
negative_idx = tf.logical_not(labels)
positive_idx = tf.logical_and(labels, indicator)
negative_idx = tf.logical_and(negative_idx, indicator)
# Sample positive and negative samples separately
max_num_pos = int(self._positive_fraction * batch_size)
sampled_pos_idx = self.subsample_indicator(positive_idx, max_num_pos)
max_num_neg = batch_size - tf.reduce_sum(tf.cast(sampled_pos_idx, tf.int32))
sampled_neg_idx = self.subsample_indicator(negative_idx, max_num_neg)
sampled_idx = tf.logical_or(sampled_pos_idx, sampled_neg_idx)
return sampled_idx
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.balanced_positive_negative_sampler."""
import numpy as np
import tensorflow as tf
from object_detection.core import balanced_positive_negative_sampler
class BalancedPositiveNegativeSamplerTest(tf.test.TestCase):
def test_subsample_all_examples(self):
numpy_labels = np.random.permutation(300)
indicator = tf.constant(np.ones(300) == 1)
numpy_labels = (numpy_labels - 200) > 0
labels = tf.constant(numpy_labels)
sampler = (balanced_positive_negative_sampler.
BalancedPositiveNegativeSampler())
is_sampled = sampler.subsample(indicator, 64, labels)
with self.test_session() as sess:
is_sampled = sess.run(is_sampled)
self.assertTrue(sum(is_sampled) == 64)
self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 32)
self.assertTrue(sum(np.logical_and(
np.logical_not(numpy_labels), is_sampled)) == 32)
def test_subsample_selection(self):
# Test random sampling when only some examples can be sampled:
# 100 samples, 20 positives, 10 positives cannot be sampled
numpy_labels = np.arange(100)
numpy_indicator = numpy_labels < 90
indicator = tf.constant(numpy_indicator)
numpy_labels = (numpy_labels - 80) >= 0
labels = tf.constant(numpy_labels)
sampler = (balanced_positive_negative_sampler.
BalancedPositiveNegativeSampler())
is_sampled = sampler.subsample(indicator, 64, labels)
with self.test_session() as sess:
is_sampled = sess.run(is_sampled)
self.assertTrue(sum(is_sampled) == 64)
self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 10)
self.assertTrue(sum(np.logical_and(
np.logical_not(numpy_labels), is_sampled)) == 54)
self.assertAllEqual(is_sampled, np.logical_and(is_sampled,
numpy_indicator))
def test_raises_error_with_incorrect_label_shape(self):
labels = tf.constant([[True, False, False]])
indicator = tf.constant([True, False, True])
sampler = (balanced_positive_negative_sampler.
BalancedPositiveNegativeSampler())
with self.assertRaises(ValueError):
sampler.subsample(indicator, 64, labels)
def test_raises_error_with_incorrect_indicator_shape(self):
labels = tf.constant([True, False, False])
indicator = tf.constant([[True, False, True]])
sampler = (balanced_positive_negative_sampler.
BalancedPositiveNegativeSampler())
with self.assertRaises(ValueError):
sampler.subsample(indicator, 64, labels)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides functions to batch a dictionary of input tensors."""
import collections
import tensorflow as tf
from object_detection.core import prefetcher
rt_shape_str = '_runtime_shapes'
class BatchQueue(object):
"""BatchQueue class.
This class creates a batch queue to asynchronously enqueue tensors_dict.
It also adds a FIFO prefetcher so that the batches are readily available
for the consumers. Dequeue ops for a BatchQueue object can be created via
the Dequeue method which evaluates to a batch of tensor_dict.
Example input pipeline with batching:
------------------------------------
key, string_tensor = slim.parallel_reader.parallel_read(...)
tensor_dict = decoder.decode(string_tensor)
tensor_dict = preprocessor.preprocess(tensor_dict, ...)
batch_queue = batcher.BatchQueue(tensor_dict,
batch_size=32,
batch_queue_capacity=2000,
num_batch_queue_threads=8,
prefetch_queue_capacity=20)
tensor_dict = batch_queue.dequeue()
outputs = Model(tensor_dict)
...
-----------------------------------
Notes:
-----
This class batches tensors of unequal sizes by zero padding and unpadding
them after generating a batch. This can be computationally expensive when
batching tensors (such as images) that are of vastly different sizes. So it is
recommended that the shapes of such tensors be fully defined in tensor_dict
while other lightweight tensors such as bounding box corners and class labels
can be of varying sizes. Use either crop or resize operations to fully define
the shape of an image in tensor_dict.
It is also recommended to perform any preprocessing operations on tensors
before passing to BatchQueue and subsequently calling the Dequeue method.
Another caveat is that this class does not read the last batch if it is not
full. The current implementation makes it hard to support that use case. So,
for evaluation, when it is critical to run all the examples through your
network use the input pipeline example mentioned in core/prefetcher.py.
"""
def __init__(self, tensor_dict, batch_size, batch_queue_capacity,
num_batch_queue_threads, prefetch_queue_capacity):
"""Constructs a batch queue holding tensor_dict.
Args:
tensor_dict: dictionary of tensors to batch.
batch_size: batch size.
batch_queue_capacity: max capacity of the queue from which the tensors are
batched.
num_batch_queue_threads: number of threads to use for batching.
prefetch_queue_capacity: max capacity of the queue used to prefetch
assembled batches.
"""
# Remember static shapes to set shapes of batched tensors.
static_shapes = collections.OrderedDict(
{key: tensor.get_shape() for key, tensor in tensor_dict.items()})
# Remember runtime shapes to unpad tensors after batching.
runtime_shapes = collections.OrderedDict(
{(key + rt_shape_str): tf.shape(tensor)
for key, tensor in tensor_dict.items()})
all_tensors = tensor_dict
all_tensors.update(runtime_shapes)
batched_tensors = tf.train.batch(
all_tensors,
capacity=batch_queue_capacity,
batch_size=batch_size,
dynamic_pad=True,
num_threads=num_batch_queue_threads)
self._queue = prefetcher.prefetch(batched_tensors,
prefetch_queue_capacity)
self._static_shapes = static_shapes
self._batch_size = batch_size
def dequeue(self):
"""Dequeues a batch of tensor_dict from the BatchQueue.
TODO: use allow_smaller_final_batch to allow running over the whole eval set
Returns:
A list of tensor_dicts of the requested batch_size.
"""
batched_tensors = self._queue.dequeue()
# Separate input tensors from tensors containing their runtime shapes.
tensors = {}
shapes = {}
for key, batched_tensor in batched_tensors.items():
unbatched_tensor_list = tf.unstack(batched_tensor)
for i, unbatched_tensor in enumerate(unbatched_tensor_list):
if rt_shape_str in key:
shapes[(key[:-len(rt_shape_str)], i)] = unbatched_tensor
else:
tensors[(key, i)] = unbatched_tensor
# Undo that padding using shapes and create a list of size `batch_size` that
# contains tensor dictionaries.
tensor_dict_list = []
batch_size = self._batch_size
for batch_id in range(batch_size):
tensor_dict = {}
for key in self._static_shapes:
tensor_dict[key] = tf.slice(tensors[(key, batch_id)],
tf.zeros_like(shapes[(key, batch_id)]),
shapes[(key, batch_id)])
tensor_dict[key].set_shape(self._static_shapes[key])
tensor_dict_list.append(tensor_dict)
return tensor_dict_list
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.batcher."""
import numpy as np
import tensorflow as tf
from object_detection.core import batcher
slim = tf.contrib.slim
class BatcherTest(tf.test.TestCase):
def test_batch_and_unpad_2d_tensors_of_different_sizes_in_1st_dimension(self):
with self.test_session() as sess:
batch_size = 3
num_batches = 2
examples = tf.Variable(tf.constant(2, dtype=tf.int32))
counter = examples.count_up_to(num_batches * batch_size + 2)
boxes = tf.tile(
tf.reshape(tf.range(4), [1, 4]), tf.stack([counter, tf.constant(1)]))
batch_queue = batcher.BatchQueue(
tensor_dict={'boxes': boxes},
batch_size=batch_size,
batch_queue_capacity=100,
num_batch_queue_threads=1,
prefetch_queue_capacity=100)
batch = batch_queue.dequeue()
for tensor_dict in batch:
for tensor in tensor_dict.values():
self.assertAllEqual([None, 4], tensor.get_shape().as_list())
tf.initialize_all_variables().run()
with slim.queues.QueueRunners(sess):
i = 2
for _ in range(num_batches):
batch_np = sess.run(batch)
for tensor_dict in batch_np:
for tensor in tensor_dict.values():
self.assertAllEqual(tensor, np.tile(np.arange(4), (i, 1)))
i += 1
with self.assertRaises(tf.errors.OutOfRangeError):
sess.run(batch)
def test_batch_and_unpad_2d_tensors_of_different_sizes_in_all_dimensions(
self):
with self.test_session() as sess:
batch_size = 3
num_batches = 2
examples = tf.Variable(tf.constant(2, dtype=tf.int32))
counter = examples.count_up_to(num_batches * batch_size + 2)
image = tf.reshape(
tf.range(counter * counter), tf.stack([counter, counter]))
batch_queue = batcher.BatchQueue(
tensor_dict={'image': image},
batch_size=batch_size,
batch_queue_capacity=100,
num_batch_queue_threads=1,
prefetch_queue_capacity=100)
batch = batch_queue.dequeue()
for tensor_dict in batch:
for tensor in tensor_dict.values():
self.assertAllEqual([None, None], tensor.get_shape().as_list())
tf.initialize_all_variables().run()
with slim.queues.QueueRunners(sess):
i = 2
for _ in range(num_batches):
batch_np = sess.run(batch)
for tensor_dict in batch_np:
for tensor in tensor_dict.values():
self.assertAllEqual(tensor, np.arange(i * i).reshape((i, i)))
i += 1
with self.assertRaises(tf.errors.OutOfRangeError):
sess.run(batch)
def test_batch_and_unpad_2d_tensors_of_same_size_in_all_dimensions(self):
with self.test_session() as sess:
batch_size = 3
num_batches = 2
examples = tf.Variable(tf.constant(1, dtype=tf.int32))
counter = examples.count_up_to(num_batches * batch_size + 1)
image = tf.reshape(tf.range(1, 13), [4, 3]) * counter
batch_queue = batcher.BatchQueue(
tensor_dict={'image': image},
batch_size=batch_size,
batch_queue_capacity=100,
num_batch_queue_threads=1,
prefetch_queue_capacity=100)
batch = batch_queue.dequeue()
for tensor_dict in batch:
for tensor in tensor_dict.values():
self.assertAllEqual([4, 3], tensor.get_shape().as_list())
tf.initialize_all_variables().run()
with slim.queues.QueueRunners(sess):
i = 1
for _ in range(num_batches):
batch_np = sess.run(batch)
for tensor_dict in batch_np:
for tensor in tensor_dict.values():
self.assertAllEqual(tensor, np.arange(1, 13).reshape((4, 3)) * i)
i += 1
with self.assertRaises(tf.errors.OutOfRangeError):
sess.run(batch)
def test_batcher_when_batch_size_is_one(self):
with self.test_session() as sess:
batch_size = 1
num_batches = 2
examples = tf.Variable(tf.constant(2, dtype=tf.int32))
counter = examples.count_up_to(num_batches * batch_size + 2)
image = tf.reshape(
tf.range(counter * counter), tf.stack([counter, counter]))
batch_queue = batcher.BatchQueue(
tensor_dict={'image': image},
batch_size=batch_size,
batch_queue_capacity=100,
num_batch_queue_threads=1,
prefetch_queue_capacity=100)
batch = batch_queue.dequeue()
for tensor_dict in batch:
for tensor in tensor_dict.values():
self.assertAllEqual([None, None], tensor.get_shape().as_list())
tf.initialize_all_variables().run()
with slim.queues.QueueRunners(sess):
i = 2
for _ in range(num_batches):
batch_np = sess.run(batch)
for tensor_dict in batch_np:
for tensor in tensor_dict.values():
self.assertAllEqual(tensor, np.arange(i * i).reshape((i, i)))
i += 1
with self.assertRaises(tf.errors.OutOfRangeError):
sess.run(batch)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Base box coder.
Box coders convert between coordinate frames, namely image-centric
(with (0,0) on the top left of image) and anchor-centric (with (0,0) being
defined by a specific anchor).
Users of a BoxCoder can call two methods:
encode: which encodes a box with respect to a given anchor
(or rather, a tensor of boxes wrt a corresponding tensor of anchors) and
decode: which inverts this encoding with a decode operation.
In both cases, the arguments are assumed to be in 1-1 correspondence already;
it is not the job of a BoxCoder to perform matching.
"""
from abc import ABCMeta
from abc import abstractmethod
from abc import abstractproperty
import tensorflow as tf
# Box coder types.
FASTER_RCNN = 'faster_rcnn'
KEYPOINT = 'keypoint'
MEAN_STDDEV = 'mean_stddev'
SQUARE = 'square'
class BoxCoder(object):
"""Abstract base class for box coder."""
__metaclass__ = ABCMeta
@abstractproperty
def code_size(self):
"""Return the size of each code.
This number is a constant and should agree with the output of the `encode`
op (e.g. if rel_codes is the output of self.encode(...), then it should have
shape [N, code_size()]). This abstractproperty should be overridden by
implementations.
Returns:
an integer constant
"""
pass
def encode(self, boxes, anchors):
"""Encode a box list relative to an anchor collection.
Args:
boxes: BoxList holding N boxes to be encoded
anchors: BoxList of N anchors
Returns:
a tensor representing N relative-encoded boxes
"""
with tf.name_scope('Encode'):
return self._encode(boxes, anchors)
def decode(self, rel_codes, anchors):
"""Decode boxes that are encoded relative to an anchor collection.
Args:
rel_codes: a tensor representing N relative-encoded boxes
anchors: BoxList of anchors
Returns:
boxlist: BoxList holding N boxes encoded in the ordinary way (i.e.,
with corners y_min, x_min, y_max, x_max)
"""
with tf.name_scope('Decode'):
return self._decode(rel_codes, anchors)
@abstractmethod
def _encode(self, boxes, anchors):
"""Method to be overriden by implementations.
Args:
boxes: BoxList holding N boxes to be encoded
anchors: BoxList of N anchors
Returns:
a tensor representing N relative-encoded boxes
"""
pass
@abstractmethod
def _decode(self, rel_codes, anchors):
"""Method to be overriden by implementations.
Args:
rel_codes: a tensor representing N relative-encoded boxes
anchors: BoxList of anchors
Returns:
boxlist: BoxList holding N boxes encoded in the ordinary way (i.e.,
with corners y_min, x_min, y_max, x_max)
"""
pass
def batch_decode(encoded_boxes, box_coder, anchors):
"""Decode a batch of encoded boxes.
This op takes a batch of encoded bounding boxes and transforms
them to a batch of bounding boxes specified by their corners in
the order of [y_min, x_min, y_max, x_max].
Args:
encoded_boxes: a float32 tensor of shape [batch_size, num_anchors,
code_size] representing the location of the objects.
box_coder: a BoxCoder object.
anchors: a BoxList of anchors used to encode `encoded_boxes`.
Returns:
decoded_boxes: a float32 tensor of shape [batch_size, num_anchors,
coder_size] representing the corners of the objects in the order
of [y_min, x_min, y_max, x_max].
Raises:
ValueError: if batch sizes of the inputs are inconsistent, or if
the number of anchors inferred from encoded_boxes and anchors are
inconsistent.
"""
encoded_boxes.get_shape().assert_has_rank(3)
if encoded_boxes.get_shape()[1].value != anchors.num_boxes_static():
raise ValueError('The number of anchors inferred from encoded_boxes'
' and anchors are inconsistent: shape[1] of encoded_boxes'
' %s should be equal to the number of anchors: %s.' %
(encoded_boxes.get_shape()[1].value,
anchors.num_boxes_static()))
decoded_boxes = tf.stack([
box_coder.decode(boxes, anchors).get()
for boxes in tf.unstack(encoded_boxes)
])
return decoded_boxes
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.box_coder."""
import tensorflow as tf
from object_detection.core import box_coder
from object_detection.core import box_list
class MockBoxCoder(box_coder.BoxCoder):
"""Test BoxCoder that encodes/decodes using the multiply-by-two function."""
def code_size(self):
return 4
def _encode(self, boxes, anchors):
return 2.0 * boxes.get()
def _decode(self, rel_codes, anchors):
return box_list.BoxList(rel_codes / 2.0)
class BoxCoderTest(tf.test.TestCase):
def test_batch_decode(self):
mock_anchor_corners = tf.constant(
[[0, 0.1, 0.2, 0.3], [0.2, 0.4, 0.4, 0.6]], tf.float32)
mock_anchors = box_list.BoxList(mock_anchor_corners)
mock_box_coder = MockBoxCoder()
expected_boxes = [[[0.0, 0.1, 0.5, 0.6], [0.5, 0.6, 0.7, 0.8]],
[[0.1, 0.2, 0.3, 0.4], [0.7, 0.8, 0.9, 1.0]]]
encoded_boxes_list = [mock_box_coder.encode(
box_list.BoxList(tf.constant(boxes)), mock_anchors)
for boxes in expected_boxes]
encoded_boxes = tf.stack(encoded_boxes_list)
decoded_boxes = box_coder.batch_decode(
encoded_boxes, mock_box_coder, mock_anchors)
with self.test_session() as sess:
decoded_boxes_result = sess.run(decoded_boxes)
self.assertAllClose(expected_boxes, decoded_boxes_result)
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment