Unverified Commit 70255908 authored by pkulzc's avatar pkulzc Committed by GitHub
Browse files

Object detection Internal Changes. (#4757)

* Merged commit includes the following changes:
204316992  by Zhichao Lu:

    Update docs to prepare inputs

--
204309254  by Zhichao Lu:

    Update running_pets.md to use new binaries and correct a few things in running_on_cloud.md

--
204306734  by Zhichao Lu:

    Move old binaries into legacy folder and add deprecation notice.

--
204267757  by Zhichao Lu:

    Fixing a problem in VRD evaluation with missing ground truth annotations for
    images that do not contain objects from 62 groundtruth classes.

--
204167430  by Zhichao Lu:

    This fixes a flaky losses test failure.

--
203670721  by Zhichao Lu:

    Internal change.

--
203569388  by Zhichao Lu:

    Internal change

203546580  by Zhichao Lu:

    * Expand TPU compatibility g3doc with config snippets
    * Change mscoco dataset path in sample configs to the sharded versions

--
203325694  by Zhichao Lu:

    Make merge_multiple_label_boxes work for model_main code path.

--
203305655  by Zhichao Lu:

    Remove the 1x1 conv layer before pooling in MobileNet-v1-PPN feature extractor.

--
203139608  by Zhichao Lu:

    - Support exponential_decay with burnin learning rate schedule.
    - Add the minimum learning rate option.
    - Make the exponential decay start only after the burnin steps.

--
203068703  by Zhichao Lu:

    Modify create_coco_tf_record.py to output sharded files.

--
203025308  by Zhichao Lu:

    Add an option to share the prediction tower in WeightSharedBoxPredictor.

--
203024942  by Zhichao Lu:

    Move ssd mobilenet v1 ppn configs to third party.

--
202901259  by Zhichao Lu:

    Delete obsolete ssd mobilenet v1 focal loss configs and update pets dataset path

--
202894154  by Zhichao Lu:

    Move all TPU compatible ssd mobilenet v1 coco14/pet configs to third party.

--
202861774  by Zhichao Lu:

    Move Retinanet (SSD + FPN + Shared box predictor) configs to third_party.

--

PiperOrigin-RevId: 204316992

* Add original files back.
parent ee6fdda1
...@@ -39,15 +39,18 @@ https://scholar.googleusercontent.com/scholar.bib?q=info:l291WsrB-hQJ:scholar.go ...@@ -39,15 +39,18 @@ https://scholar.googleusercontent.com/scholar.bib?q=info:l291WsrB-hQJ:scholar.go
## Table of contents ## Table of contents
Setup:
* <a href='g3doc/installation.md'>Installation</a><br>
Quick Start: Quick Start:
* <a href='object_detection_tutorial.ipynb'> * <a href='object_detection_tutorial.ipynb'>
Quick Start: Jupyter notebook for off-the-shelf inference</a><br> Quick Start: Jupyter notebook for off-the-shelf inference</a><br>
* <a href="g3doc/running_pets.md">Quick Start: Training a pet detector</a><br> * <a href="g3doc/running_pets.md">Quick Start: Training a pet detector</a><br>
Setup: Customizing a Pipeline:
* <a href='g3doc/installation.md'>Installation</a><br>
* <a href='g3doc/configuring_jobs.md'> * <a href='g3doc/configuring_jobs.md'>
Configuring an object detection pipeline</a><br> Configuring an object detection pipeline</a><br>
* <a href='g3doc/preparing_inputs.md'>Preparing inputs</a><br> * <a href='g3doc/preparing_inputs.md'>Preparing inputs</a><br>
...@@ -73,7 +76,7 @@ Extras: ...@@ -73,7 +76,7 @@ Extras:
* <a href='g3doc/instance_segmentation.md'> * <a href='g3doc/instance_segmentation.md'>
Run an instance segmentation model</a><br> Run an instance segmentation model</a><br>
* <a href='g3doc/challenge_evaluation.md'> * <a href='g3doc/challenge_evaluation.md'>
Run the evaluation for the Open Images Challenge 2018.</a><br> Run the evaluation for the Open Images Challenge 2018</a><br>
## Getting Help ## Getting Help
......
...@@ -87,7 +87,8 @@ def build(argscope_fn, box_predictor_config, is_training, num_classes): ...@@ -87,7 +87,8 @@ def build(argscope_fn, box_predictor_config, is_training, num_classes):
class_prediction_bias_init=conv_box_predictor. class_prediction_bias_init=conv_box_predictor.
class_prediction_bias_init, class_prediction_bias_init,
use_dropout=conv_box_predictor.use_dropout, use_dropout=conv_box_predictor.use_dropout,
dropout_keep_prob=conv_box_predictor.dropout_keep_probability) dropout_keep_prob=conv_box_predictor.dropout_keep_probability,
share_prediction_tower=conv_box_predictor.share_prediction_tower)
return box_predictor_object return box_predictor_object
if box_predictor_oneof == 'mask_rcnn_box_predictor': if box_predictor_oneof == 'mask_rcnn_box_predictor':
......
...@@ -33,10 +33,13 @@ from object_detection.models import faster_rcnn_nas_feature_extractor as frcnn_n ...@@ -33,10 +33,13 @@ from object_detection.models import faster_rcnn_nas_feature_extractor as frcnn_n
from object_detection.models import faster_rcnn_pnas_feature_extractor as frcnn_pnas from object_detection.models import faster_rcnn_pnas_feature_extractor as frcnn_pnas
from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1 from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1
from object_detection.models import ssd_resnet_v1_fpn_feature_extractor as ssd_resnet_v1_fpn from object_detection.models import ssd_resnet_v1_fpn_feature_extractor as ssd_resnet_v1_fpn
from object_detection.models import ssd_resnet_v1_ppn_feature_extractor as ssd_resnet_v1_ppn
from object_detection.models.embedded_ssd_mobilenet_v1_feature_extractor import EmbeddedSSDMobileNetV1FeatureExtractor from object_detection.models.embedded_ssd_mobilenet_v1_feature_extractor import EmbeddedSSDMobileNetV1FeatureExtractor
from object_detection.models.ssd_inception_v2_feature_extractor import SSDInceptionV2FeatureExtractor from object_detection.models.ssd_inception_v2_feature_extractor import SSDInceptionV2FeatureExtractor
from object_detection.models.ssd_inception_v3_feature_extractor import SSDInceptionV3FeatureExtractor from object_detection.models.ssd_inception_v3_feature_extractor import SSDInceptionV3FeatureExtractor
from object_detection.models.ssd_mobilenet_v1_feature_extractor import SSDMobileNetV1FeatureExtractor from object_detection.models.ssd_mobilenet_v1_feature_extractor import SSDMobileNetV1FeatureExtractor
from object_detection.models.ssd_mobilenet_v1_fpn_feature_extractor import SSDMobileNetV1FpnFeatureExtractor
from object_detection.models.ssd_mobilenet_v1_ppn_feature_extractor import SSDMobileNetV1PpnFeatureExtractor
from object_detection.models.ssd_mobilenet_v2_feature_extractor import SSDMobileNetV2FeatureExtractor from object_detection.models.ssd_mobilenet_v2_feature_extractor import SSDMobileNetV2FeatureExtractor
from object_detection.protos import model_pb2 from object_detection.protos import model_pb2
...@@ -45,10 +48,17 @@ SSD_FEATURE_EXTRACTOR_CLASS_MAP = { ...@@ -45,10 +48,17 @@ SSD_FEATURE_EXTRACTOR_CLASS_MAP = {
'ssd_inception_v2': SSDInceptionV2FeatureExtractor, 'ssd_inception_v2': SSDInceptionV2FeatureExtractor,
'ssd_inception_v3': SSDInceptionV3FeatureExtractor, 'ssd_inception_v3': SSDInceptionV3FeatureExtractor,
'ssd_mobilenet_v1': SSDMobileNetV1FeatureExtractor, 'ssd_mobilenet_v1': SSDMobileNetV1FeatureExtractor,
'ssd_mobilenet_v1_fpn': SSDMobileNetV1FpnFeatureExtractor,
'ssd_mobilenet_v1_ppn': SSDMobileNetV1PpnFeatureExtractor,
'ssd_mobilenet_v2': SSDMobileNetV2FeatureExtractor, 'ssd_mobilenet_v2': SSDMobileNetV2FeatureExtractor,
'ssd_resnet50_v1_fpn': ssd_resnet_v1_fpn.SSDResnet50V1FpnFeatureExtractor, 'ssd_resnet50_v1_fpn': ssd_resnet_v1_fpn.SSDResnet50V1FpnFeatureExtractor,
'ssd_resnet101_v1_fpn': ssd_resnet_v1_fpn.SSDResnet101V1FpnFeatureExtractor, 'ssd_resnet101_v1_fpn': ssd_resnet_v1_fpn.SSDResnet101V1FpnFeatureExtractor,
'ssd_resnet152_v1_fpn': ssd_resnet_v1_fpn.SSDResnet152V1FpnFeatureExtractor, 'ssd_resnet152_v1_fpn': ssd_resnet_v1_fpn.SSDResnet152V1FpnFeatureExtractor,
'ssd_resnet50_v1_ppn': ssd_resnet_v1_ppn.SSDResnet50V1PpnFeatureExtractor,
'ssd_resnet101_v1_ppn':
ssd_resnet_v1_ppn.SSDResnet101V1PpnFeatureExtractor,
'ssd_resnet152_v1_ppn':
ssd_resnet_v1_ppn.SSDResnet152V1PpnFeatureExtractor,
'embedded_ssd_mobilenet_v1': EmbeddedSSDMobileNetV1FeatureExtractor, 'embedded_ssd_mobilenet_v1': EmbeddedSSDMobileNetV1FeatureExtractor,
} }
...@@ -327,6 +337,8 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): ...@@ -327,6 +337,8 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
second_stage_classification_loss_weight, second_stage_classification_loss_weight,
second_stage_localization_loss_weight) second_stage_localization_loss_weight)
use_matmul_crop_and_resize = (frcnn_config.use_matmul_crop_and_resize)
common_kwargs = { common_kwargs = {
'is_training': is_training, 'is_training': is_training,
'num_classes': num_classes, 'num_classes': num_classes,
...@@ -360,7 +372,9 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): ...@@ -360,7 +372,9 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
'second_stage_classification_loss_weight': 'second_stage_classification_loss_weight':
second_stage_classification_loss_weight, second_stage_classification_loss_weight,
'hard_example_miner': hard_example_miner, 'hard_example_miner': hard_example_miner,
'add_summaries': add_summaries} 'add_summaries': add_summaries,
'use_matmul_crop_and_resize': use_matmul_crop_and_resize
}
if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor): if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor):
return rfcn_meta_arch.RFCNMetaArch( return rfcn_meta_arch.RFCNMetaArch(
......
...@@ -28,10 +28,13 @@ from object_detection.models import faster_rcnn_nas_feature_extractor as frcnn_n ...@@ -28,10 +28,13 @@ from object_detection.models import faster_rcnn_nas_feature_extractor as frcnn_n
from object_detection.models import faster_rcnn_pnas_feature_extractor as frcnn_pnas from object_detection.models import faster_rcnn_pnas_feature_extractor as frcnn_pnas
from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1 from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1
from object_detection.models import ssd_resnet_v1_fpn_feature_extractor as ssd_resnet_v1_fpn from object_detection.models import ssd_resnet_v1_fpn_feature_extractor as ssd_resnet_v1_fpn
from object_detection.models import ssd_resnet_v1_ppn_feature_extractor as ssd_resnet_v1_ppn
from object_detection.models.embedded_ssd_mobilenet_v1_feature_extractor import EmbeddedSSDMobileNetV1FeatureExtractor from object_detection.models.embedded_ssd_mobilenet_v1_feature_extractor import EmbeddedSSDMobileNetV1FeatureExtractor
from object_detection.models.ssd_inception_v2_feature_extractor import SSDInceptionV2FeatureExtractor from object_detection.models.ssd_inception_v2_feature_extractor import SSDInceptionV2FeatureExtractor
from object_detection.models.ssd_inception_v3_feature_extractor import SSDInceptionV3FeatureExtractor from object_detection.models.ssd_inception_v3_feature_extractor import SSDInceptionV3FeatureExtractor
from object_detection.models.ssd_mobilenet_v1_feature_extractor import SSDMobileNetV1FeatureExtractor from object_detection.models.ssd_mobilenet_v1_feature_extractor import SSDMobileNetV1FeatureExtractor
from object_detection.models.ssd_mobilenet_v1_fpn_feature_extractor import SSDMobileNetV1FpnFeatureExtractor
from object_detection.models.ssd_mobilenet_v1_ppn_feature_extractor import SSDMobileNetV1PpnFeatureExtractor
from object_detection.models.ssd_mobilenet_v2_feature_extractor import SSDMobileNetV2FeatureExtractor from object_detection.models.ssd_mobilenet_v2_feature_extractor import SSDMobileNetV2FeatureExtractor
from object_detection.protos import model_pb2 from object_detection.protos import model_pb2
...@@ -50,7 +53,22 @@ SSD_RESNET_V1_FPN_FEAT_MAPS = { ...@@ -50,7 +53,22 @@ SSD_RESNET_V1_FPN_FEAT_MAPS = {
'ssd_resnet101_v1_fpn': 'ssd_resnet101_v1_fpn':
ssd_resnet_v1_fpn.SSDResnet101V1FpnFeatureExtractor, ssd_resnet_v1_fpn.SSDResnet101V1FpnFeatureExtractor,
'ssd_resnet152_v1_fpn': 'ssd_resnet152_v1_fpn':
ssd_resnet_v1_fpn.SSDResnet152V1FpnFeatureExtractor ssd_resnet_v1_fpn.SSDResnet152V1FpnFeatureExtractor,
'ssd_resnet50_v1_ppn':
ssd_resnet_v1_ppn.SSDResnet50V1PpnFeatureExtractor,
'ssd_resnet101_v1_ppn':
ssd_resnet_v1_ppn.SSDResnet101V1PpnFeatureExtractor,
'ssd_resnet152_v1_ppn':
ssd_resnet_v1_ppn.SSDResnet152V1PpnFeatureExtractor
}
SSD_RESNET_V1_PPN_FEAT_MAPS = {
'ssd_resnet50_v1_ppn':
ssd_resnet_v1_ppn.SSDResnet50V1PpnFeatureExtractor,
'ssd_resnet101_v1_ppn':
ssd_resnet_v1_ppn.SSDResnet101V1PpnFeatureExtractor,
'ssd_resnet152_v1_ppn':
ssd_resnet_v1_ppn.SSDResnet152V1PpnFeatureExtractor
} }
...@@ -296,6 +314,87 @@ class ModelBuilderTest(tf.test.TestCase): ...@@ -296,6 +314,87 @@ class ModelBuilderTest(tf.test.TestCase):
self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch) self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
self.assertIsInstance(model._feature_extractor, extractor_class) self.assertIsInstance(model._feature_extractor, extractor_class)
def test_create_ssd_resnet_v1_ppn_model_from_config(self):
model_text_proto = """
ssd {
feature_extractor {
type: 'ssd_resnet_v1_50_ppn'
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
box_coder {
mean_stddev_box_coder {
}
}
matcher {
bipartite_matcher {
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
aspect_ratios: 1.0
}
}
image_resizer {
fixed_shape_resizer {
height: 320
width: 320
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
depth: 1024
class_prediction_bias_init: -4.6
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.0004
}
}
initializer {
variance_scaling_initializer {
}
}
}
num_layers_before_predictor: 2
kernel_size: 1
}
}
loss {
classification_loss {
weighted_softmax {
}
}
localization_loss {
weighted_l2 {
}
}
classification_weight: 1.0
localization_weight: 1.0
}
}"""
model_proto = model_pb2.DetectionModel()
text_format.Merge(model_text_proto, model_proto)
for extractor_type, extractor_class in SSD_RESNET_V1_PPN_FEAT_MAPS.items():
model_proto.ssd.feature_extractor.type = extractor_type
model = model_builder.build(model_proto, is_training=True)
self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
self.assertIsInstance(model._feature_extractor, extractor_class)
def test_create_ssd_mobilenet_v1_model_from_config(self): def test_create_ssd_mobilenet_v1_model_from_config(self):
model_text_proto = """ model_text_proto = """
ssd { ssd {
...@@ -373,6 +472,160 @@ class ModelBuilderTest(tf.test.TestCase): ...@@ -373,6 +472,160 @@ class ModelBuilderTest(tf.test.TestCase):
self.assertTrue(model._freeze_batchnorm) self.assertTrue(model._freeze_batchnorm)
self.assertTrue(model._inplace_batchnorm_update) self.assertTrue(model._inplace_batchnorm_update)
def test_create_ssd_mobilenet_v1_fpn_model_from_config(self):
model_text_proto = """
ssd {
freeze_batchnorm: true
inplace_batchnorm_update: true
feature_extractor {
type: 'ssd_mobilenet_v1_fpn'
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
box_coder {
faster_rcnn_box_coder {
}
}
matcher {
argmax_matcher {
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
aspect_ratios: 1.0
}
}
image_resizer {
fixed_shape_resizer {
height: 320
width: 320
}
}
box_predictor {
convolutional_box_predictor {
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
}
normalize_loc_loss_by_codesize: true
loss {
classification_loss {
weighted_softmax {
}
}
localization_loss {
weighted_smooth_l1 {
}
}
}
}"""
model_proto = model_pb2.DetectionModel()
text_format.Merge(model_text_proto, model_proto)
model = self.create_model(model_proto)
self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
self.assertIsInstance(model._feature_extractor,
SSDMobileNetV1FpnFeatureExtractor)
self.assertTrue(model._normalize_loc_loss_by_codesize)
self.assertTrue(model._freeze_batchnorm)
self.assertTrue(model._inplace_batchnorm_update)
def test_create_ssd_mobilenet_v1_ppn_model_from_config(self):
model_text_proto = """
ssd {
freeze_batchnorm: true
inplace_batchnorm_update: true
feature_extractor {
type: 'ssd_mobilenet_v1_ppn'
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
box_coder {
faster_rcnn_box_coder {
}
}
matcher {
argmax_matcher {
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
aspect_ratios: 1.0
}
}
image_resizer {
fixed_shape_resizer {
height: 320
width: 320
}
}
box_predictor {
convolutional_box_predictor {
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
}
normalize_loc_loss_by_codesize: true
loss {
classification_loss {
weighted_softmax {
}
}
localization_loss {
weighted_smooth_l1 {
}
}
}
}"""
model_proto = model_pb2.DetectionModel()
text_format.Merge(model_text_proto, model_proto)
model = self.create_model(model_proto)
self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
self.assertIsInstance(model._feature_extractor,
SSDMobileNetV1PpnFeatureExtractor)
self.assertTrue(model._normalize_loc_loss_by_codesize)
self.assertTrue(model._freeze_batchnorm)
self.assertTrue(model._inplace_batchnorm_update)
def test_create_ssd_mobilenet_v2_model_from_config(self): def test_create_ssd_mobilenet_v2_model_from_config(self):
model_text_proto = """ model_text_proto = """
ssd { ssd {
......
...@@ -90,12 +90,15 @@ def _create_learning_rate(learning_rate_config): ...@@ -90,12 +90,15 @@ def _create_learning_rate(learning_rate_config):
if learning_rate_type == 'exponential_decay_learning_rate': if learning_rate_type == 'exponential_decay_learning_rate':
config = learning_rate_config.exponential_decay_learning_rate config = learning_rate_config.exponential_decay_learning_rate
learning_rate = tf.train.exponential_decay( learning_rate = learning_schedules.exponential_decay_with_burnin(
config.initial_learning_rate,
tf.train.get_or_create_global_step(), tf.train.get_or_create_global_step(),
config.initial_learning_rate,
config.decay_steps, config.decay_steps,
config.decay_factor, config.decay_factor,
staircase=config.staircase, name='learning_rate') burnin_learning_rate=config.burnin_learning_rate,
burnin_steps=config.burnin_steps,
min_learning_rate=config.min_learning_rate,
staircase=config.staircase)
if learning_rate_type == 'manual_step_learning_rate': if learning_rate_type == 'manual_step_learning_rate':
config = learning_rate_config.manual_step_learning_rate config = learning_rate_config.manual_step_learning_rate
......
...@@ -802,7 +802,8 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor): ...@@ -802,7 +802,8 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
kernel_size=3, kernel_size=3,
class_prediction_bias_init=0.0, class_prediction_bias_init=0.0,
use_dropout=False, use_dropout=False,
dropout_keep_prob=0.8): dropout_keep_prob=0.8,
share_prediction_tower=False):
"""Constructor. """Constructor.
Args: Args:
...@@ -822,6 +823,8 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor): ...@@ -822,6 +823,8 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
conv2d layer before class prediction. conv2d layer before class prediction.
use_dropout: Whether to apply dropout to class prediction head. use_dropout: Whether to apply dropout to class prediction head.
dropout_keep_prob: Probability of keeping activiations. dropout_keep_prob: Probability of keeping activiations.
share_prediction_tower: Whether to share the multi-layer tower between box
prediction and class prediction heads.
""" """
super(WeightSharedConvolutionalBoxPredictor, self).__init__(is_training, super(WeightSharedConvolutionalBoxPredictor, self).__init__(is_training,
num_classes) num_classes)
...@@ -833,6 +836,7 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor): ...@@ -833,6 +836,7 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
self._class_prediction_bias_init = class_prediction_bias_init self._class_prediction_bias_init = class_prediction_bias_init
self._use_dropout = use_dropout self._use_dropout = use_dropout
self._dropout_keep_prob = dropout_keep_prob self._dropout_keep_prob = dropout_keep_prob
self._share_prediction_tower = share_prediction_tower
def _predict(self, image_features, num_predictions_per_location_list): def _predict(self, image_features, num_predictions_per_location_list):
"""Computes encoded object locations and corresponding confidences. """Computes encoded object locations and corresponding confidences.
...@@ -912,6 +916,9 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor): ...@@ -912,6 +916,9 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
box_encodings_net = image_feature box_encodings_net = image_feature
class_predictions_net = image_feature class_predictions_net = image_feature
for i in range(self._num_layers_before_predictor): for i in range(self._num_layers_before_predictor):
box_prediction_tower_prefix = (
'PredictionTower' if self._share_prediction_tower
else 'BoxPredictionTower')
box_encodings_net = slim.conv2d( box_encodings_net = slim.conv2d(
box_encodings_net, box_encodings_net,
self._depth, self._depth,
...@@ -920,12 +927,12 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor): ...@@ -920,12 +927,12 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
padding='SAME', padding='SAME',
activation_fn=None, activation_fn=None,
normalizer_fn=(tf.identity if apply_batch_norm else None), normalizer_fn=(tf.identity if apply_batch_norm else None),
scope='BoxPredictionTower/conv2d_{}'.format(i)) scope='{}/conv2d_{}'.format(box_prediction_tower_prefix, i))
if apply_batch_norm: if apply_batch_norm:
box_encodings_net = slim.batch_norm( box_encodings_net = slim.batch_norm(
box_encodings_net, box_encodings_net,
scope='BoxPredictionTower/conv2d_{}/BatchNorm/feature_{}'. scope='{}/conv2d_{}/BatchNorm/feature_{}'.
format(i, feature_index)) format(box_prediction_tower_prefix, i, feature_index))
box_encodings_net = tf.nn.relu6(box_encodings_net) box_encodings_net = tf.nn.relu6(box_encodings_net)
box_encodings = slim.conv2d( box_encodings = slim.conv2d(
box_encodings_net, box_encodings_net,
...@@ -935,22 +942,25 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor): ...@@ -935,22 +942,25 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
normalizer_fn=None, normalizer_fn=None,
scope='BoxPredictor') scope='BoxPredictor')
for i in range(self._num_layers_before_predictor): if self._share_prediction_tower:
class_predictions_net = slim.conv2d( class_predictions_net = box_encodings_net
class_predictions_net, else:
self._depth, for i in range(self._num_layers_before_predictor):
[self._kernel_size, self._kernel_size], class_predictions_net = slim.conv2d(
stride=1,
padding='SAME',
activation_fn=None,
normalizer_fn=(tf.identity if apply_batch_norm else None),
scope='ClassPredictionTower/conv2d_{}'.format(i))
if apply_batch_norm:
class_predictions_net = slim.batch_norm(
class_predictions_net, class_predictions_net,
scope='ClassPredictionTower/conv2d_{}/BatchNorm/feature_{}' self._depth,
.format(i, feature_index)) [self._kernel_size, self._kernel_size],
class_predictions_net = tf.nn.relu6(class_predictions_net) stride=1,
padding='SAME',
activation_fn=None,
normalizer_fn=(tf.identity if apply_batch_norm else None),
scope='ClassPredictionTower/conv2d_{}'.format(i))
if apply_batch_norm:
class_predictions_net = slim.batch_norm(
class_predictions_net,
scope='ClassPredictionTower/conv2d_{}/BatchNorm/feature_{}'
.format(i, feature_index))
class_predictions_net = tf.nn.relu6(class_predictions_net)
if self._use_dropout: if self._use_dropout:
class_predictions_net = slim.dropout( class_predictions_net = slim.dropout(
class_predictions_net, keep_prob=self._dropout_keep_prob) class_predictions_net, keep_prob=self._dropout_keep_prob)
......
...@@ -720,6 +720,60 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase): ...@@ -720,6 +720,60 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
'ClassPredictor/biases')]) 'ClassPredictor/biases')])
self.assertEqual(expected_variable_set, actual_variable_set) self.assertEqual(expected_variable_set, actual_variable_set)
def test_predictions_share_weights_share_tower_not_batchnorm(
self):
num_classes_without_background = 6
def graph_fn(image_features1, image_features2):
conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
is_training=False,
num_classes=num_classes_without_background,
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
depth=32,
num_layers_before_predictor=2,
box_code_size=4,
share_prediction_tower=True)
box_predictions = conv_box_predictor.predict(
[image_features1, image_features2],
num_predictions_per_location=[5, 5],
scope='BoxPredictor')
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
class_predictions_with_background = tf.concat(
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
axis=1)
return (box_encodings, class_predictions_with_background)
with self.test_session(graph=tf.Graph()):
graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32),
tf.random_uniform([4, 16, 16, 3], dtype=tf.float32))
actual_variable_set = set(
[var.op.name for var in tf.trainable_variables()])
expected_variable_set = set([
# Shared prediction tower
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'PredictionTower/conv2d_0/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'PredictionTower/conv2d_0/BatchNorm/feature_0/beta'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'PredictionTower/conv2d_0/BatchNorm/feature_1/beta'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'PredictionTower/conv2d_1/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'PredictionTower/conv2d_1/BatchNorm/feature_0/beta'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'PredictionTower/conv2d_1/BatchNorm/feature_1/beta'),
# Box prediction head
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictor/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictor/biases'),
# Class prediction head
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictor/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictor/biases')])
self.assertEqual(expected_variable_set, actual_variable_set)
def test_get_predictions_with_feature_maps_of_dynamic_shape( def test_get_predictions_with_feature_maps_of_dynamic_shape(
self): self):
image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64]) image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64])
......
...@@ -224,7 +224,7 @@ class SigmoidFocalClassificationLossTest(tf.test.TestCase): ...@@ -224,7 +224,7 @@ class SigmoidFocalClassificationLossTest(tf.test.TestCase):
def testEasyExamplesProduceSmallLossComparedToSigmoidXEntropy(self): def testEasyExamplesProduceSmallLossComparedToSigmoidXEntropy(self):
prediction_tensor = tf.constant([[[_logit(0.97)], prediction_tensor = tf.constant([[[_logit(0.97)],
[_logit(0.90)], [_logit(0.91)],
[_logit(0.73)], [_logit(0.73)],
[_logit(0.27)], [_logit(0.27)],
[_logit(0.09)], [_logit(0.09)],
......
...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
r"""Convert raw COCO dataset to TFRecord for object_detection. r"""Convert raw COCO dataset to TFRecord for object_detection.
Please note that this tool creates sharded output files.
Example usage: Example usage:
python create_coco_tf_record.py --logtostderr \ python create_coco_tf_record.py --logtostderr \
--train_image_dir="${TRAIN_IMAGE_DIR}" \ --train_image_dir="${TRAIN_IMAGE_DIR}" \
...@@ -33,12 +35,14 @@ import hashlib ...@@ -33,12 +35,14 @@ import hashlib
import io import io
import json import json
import os import os
import contextlib2
import numpy as np import numpy as np
import PIL.Image import PIL.Image
from pycocotools import mask from pycocotools import mask
import tensorflow as tf import tensorflow as tf
from object_detection.dataset_tools import tf_record_creation_util
from object_detection.utils import dataset_util from object_detection.utils import dataset_util
from object_detection.utils import label_map_util from object_detection.utils import label_map_util
...@@ -188,7 +192,7 @@ def create_tf_example(image, ...@@ -188,7 +192,7 @@ def create_tf_example(image,
def _create_tf_record_from_coco_annotations( def _create_tf_record_from_coco_annotations(
annotations_file, image_dir, output_path, include_masks): annotations_file, image_dir, output_path, include_masks, num_shards):
"""Loads COCO annotation json files and converts to tf.Record format. """Loads COCO annotation json files and converts to tf.Record format.
Args: Args:
...@@ -197,8 +201,12 @@ def _create_tf_record_from_coco_annotations( ...@@ -197,8 +201,12 @@ def _create_tf_record_from_coco_annotations(
output_path: Path to output tf.Record file. output_path: Path to output tf.Record file.
include_masks: Whether to include instance segmentations masks include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False. (PNG encoded) in the result. default: False.
num_shards: number of output file shards.
""" """
with tf.gfile.GFile(annotations_file, 'r') as fid: with contextlib2.ExitStack() as tf_record_close_stack, \
tf.gfile.GFile(annotations_file, 'r') as fid:
output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
tf_record_close_stack, output_path, num_shards)
groundtruth_data = json.load(fid) groundtruth_data = json.load(fid)
images = groundtruth_data['images'] images = groundtruth_data['images']
category_index = label_map_util.create_category_index( category_index = label_map_util.create_category_index(
...@@ -222,8 +230,6 @@ def _create_tf_record_from_coco_annotations( ...@@ -222,8 +230,6 @@ def _create_tf_record_from_coco_annotations(
tf.logging.info('%d images are missing annotations.', tf.logging.info('%d images are missing annotations.',
missing_annotation_count) missing_annotation_count)
tf.logging.info('writing to output path: %s', output_path)
writer = tf.python_io.TFRecordWriter(output_path)
total_num_annotations_skipped = 0 total_num_annotations_skipped = 0
for idx, image in enumerate(images): for idx, image in enumerate(images):
if idx % 100 == 0: if idx % 100 == 0:
...@@ -232,8 +238,8 @@ def _create_tf_record_from_coco_annotations( ...@@ -232,8 +238,8 @@ def _create_tf_record_from_coco_annotations(
_, tf_example, num_annotations_skipped = create_tf_example( _, tf_example, num_annotations_skipped = create_tf_example(
image, annotations_list, image_dir, category_index, include_masks) image, annotations_list, image_dir, category_index, include_masks)
total_num_annotations_skipped += num_annotations_skipped total_num_annotations_skipped += num_annotations_skipped
writer.write(tf_example.SerializeToString()) shard_idx = idx % num_shards
writer.close() output_tfrecords[shard_idx].write(tf_example.SerializeToString())
tf.logging.info('Finished writing, skipped %d annotations.', tf.logging.info('Finished writing, skipped %d annotations.',
total_num_annotations_skipped) total_num_annotations_skipped)
...@@ -256,17 +262,20 @@ def main(_): ...@@ -256,17 +262,20 @@ def main(_):
FLAGS.train_annotations_file, FLAGS.train_annotations_file,
FLAGS.train_image_dir, FLAGS.train_image_dir,
train_output_path, train_output_path,
FLAGS.include_masks) FLAGS.include_masks,
num_shards=100)
_create_tf_record_from_coco_annotations( _create_tf_record_from_coco_annotations(
FLAGS.val_annotations_file, FLAGS.val_annotations_file,
FLAGS.val_image_dir, FLAGS.val_image_dir,
val_output_path, val_output_path,
FLAGS.include_masks) FLAGS.include_masks,
num_shards=10)
_create_tf_record_from_coco_annotations( _create_tf_record_from_coco_annotations(
FLAGS.testdev_annotations_file, FLAGS.testdev_annotations_file,
FLAGS.test_image_dir, FLAGS.test_image_dir,
testdev_output_path, testdev_output_path,
FLAGS.include_masks) FLAGS.include_masks,
num_shards=100)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
"""Test for create_coco_tf_record.py.""" """Test for create_coco_tf_record.py."""
import io import io
import json
import os import os
import numpy as np import numpy as np
...@@ -183,6 +184,62 @@ class CreateCocoTFRecordTest(tf.test.TestCase): ...@@ -183,6 +184,62 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
[0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 1, 1], [0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 1, 1],
[0, 0, 0, 0, 0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 1, 1]]) [0, 0, 0, 0, 0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 1, 1]])
def test_create_sharded_tf_record(self):
tmp_dir = self.get_temp_dir()
image_paths = ['tmp1_image.jpg', 'tmp2_image.jpg']
for image_path in image_paths:
image_data = np.random.rand(256, 256, 3)
save_path = os.path.join(tmp_dir, image_path)
image = PIL.Image.fromarray(image_data, 'RGB')
image.save(save_path)
images = [{
'file_name': image_paths[0],
'height': 256,
'width': 256,
'id': 11,
}, {
'file_name': image_paths[1],
'height': 256,
'width': 256,
'id': 12,
}]
annotations = [{
'area': .5,
'iscrowd': False,
'image_id': 11,
'bbox': [64, 64, 128, 128],
'category_id': 2,
'id': 1000,
}]
category_index = [{
'name': 'dog',
'id': 1
}, {
'name': 'cat',
'id': 2
}, {
'name': 'human',
'id': 3
}]
groundtruth_data = {'images': images, 'annotations': annotations,
'categories': category_index}
annotation_file = os.path.join(tmp_dir, 'annotation.json')
with open(annotation_file, 'w') as annotation_fid:
json.dump(groundtruth_data, annotation_fid)
output_path = os.path.join(tmp_dir, 'out.record')
create_coco_tf_record._create_tf_record_from_coco_annotations(
annotation_file,
tmp_dir,
output_path,
False,
2)
self.assertTrue(os.path.exists(output_path + '-00000-of-00002'))
self.assertTrue(os.path.exists(output_path + '-00001-of-00002'))
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Exports an SSD detection model to use with tf-lite.
Outputs file:
* A tflite compatible frozen graph - $output_directory/tflite_graph.pb
The exported graph has the following input and output nodes.
Inputs:
'normalized_input_image_tensor': a float32 tensor of shape
[1, height, width, 3] containing the normalized input image. Note that the
height and width must be compatible with the height and width configured in
the fixed_shape_image resizer options in the pipeline config proto.
In floating point Mobilenet model, 'normalized_image_tensor' has values
between [-1,1). This typically means mapping each pixel (linearly)
to a value between [-1, 1]. Input image
values between 0 and 255 are scaled by (1/128.0) and then a value of
-1 is added to them to ensure the range is [-1,1).
In quantized Mobilenet model, 'normalized_image_tensor' has values between [0,
255].
In general, see the `preprocess` function defined in the feature extractor class
in the object_detection/models directory.
Outputs:
If add_postprocessing_op is true: frozen graph adds a
TFLite_Detection_PostProcess custom op node has four outputs:
detection_boxes: a float32 tensor of shape [1, num_boxes, 4] with box
locations
detection_scores: a float32 tensor of shape [1, num_boxes]
with class scores
detection_classes: a float32 tensor of shape [1, num_boxes]
with class indices
num_boxes: a float32 tensor of size 1 containing the number of detected boxes
else:
the graph has two outputs:
'raw_outputs/box_encodings': a float32 tensor of shape [1, num_anchors, 4]
containing the encoded box predictions.
'raw_outputs/class_predictions': a float32 tensor of shape
[1, num_anchors, num_classes] containing the class scores for each anchor
after applying score conversion.
Example Usage:
--------------
python object_detection/export_tflite_ssd_graph \
--pipeline_config_path path/to/ssd_mobilenet.config \
--trained_checkpoint_prefix path/to/model.ckpt \
--output_directory path/to/exported_model_directory
The expected output would be in the directory
path/to/exported_model_directory (which is created if it does not exist)
with contents:
- tflite_graph.pbtxt
- tflite_graph.pb
Config overrides (see the `config_override` flag) are text protobufs
(also of type pipeline_pb2.TrainEvalPipelineConfig) which are used to override
certain fields in the provided pipeline_config_path. These are useful for
making small changes to the inference graph that differ from the training or
eval config.
Example Usage (in which we change the NMS iou_threshold to be 0.5 and
NMS score_threshold to be 0.0):
python object_detection/export_tflite_ssd_graph \
--pipeline_config_path path/to/ssd_mobilenet.config \
--trained_checkpoint_prefix path/to/model.ckpt \
--output_directory path/to/exported_model_directory
--config_override " \
model{ \
ssd{ \
post_processing { \
batch_non_max_suppression { \
score_threshold: 0.0 \
iou_threshold: 0.5 \
} \
} \
} \
} \
"
"""
import tensorflow as tf
from google.protobuf import text_format
from object_detection import export_tflite_ssd_graph_lib
from object_detection.protos import pipeline_pb2
flags = tf.app.flags
flags.DEFINE_string('output_directory', None, 'Path to write outputs.')
flags.DEFINE_string(
'pipeline_config_path', None,
'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
'file.')
flags.DEFINE_string('trained_checkpoint_prefix', None, 'Checkpoint prefix.')
flags.DEFINE_integer('max_detections', 10,
'Maximum number of detections (boxes) to show.')
flags.DEFINE_integer('max_classes_per_detection', 1,
'Number of classes to display per detection box.')
flags.DEFINE_bool('add_postprocessing_op', True,
'Add TFLite custom op for postprocessing to the graph.')
flags.DEFINE_string(
'config_override', '', 'pipeline_pb2.TrainEvalPipelineConfig '
'text proto to override pipeline_config_path.')
FLAGS = flags.FLAGS
def main(argv):
del argv # Unused.
flags.mark_flag_as_required('output_directory')
flags.mark_flag_as_required('pipeline_config_path')
flags.mark_flag_as_required('trained_checkpoint_prefix')
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f:
text_format.Merge(f.read(), pipeline_config)
text_format.Merge(FLAGS.config_override, pipeline_config)
export_tflite_ssd_graph_lib.export_tflite_graph(
pipeline_config, FLAGS.trained_checkpoint_prefix, FLAGS.output_directory,
FLAGS.add_postprocessing_op, FLAGS.max_detections,
FLAGS.max_classes_per_detection)
if __name__ == '__main__':
tf.app.run(main)
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Exports an SSD detection model to use with tf-lite.
See export_tflite_ssd_graph.py for usage.
"""
import os
import tempfile
import numpy as np
import tensorflow as tf
from tensorflow.core.framework import attr_value_pb2
from tensorflow.core.protobuf import saver_pb2
from tensorflow.tools.graph_transforms import TransformGraph
from object_detection import exporter
from object_detection.builders import graph_rewriter_builder
from object_detection.builders import model_builder
from object_detection.builders import post_processing_builder
from object_detection.core import box_list
_DEFAULT_NUM_CHANNELS = 3
_DEFAULT_NUM_COORD_BOX = 4
def get_const_center_size_encoded_anchors(anchors):
"""Exports center-size encoded anchors as a constant tensor.
Args:
anchors: a float32 tensor of shape [num_anchors, 4] containing the anchor
boxes
Returns:
encoded_anchors: a float32 constant tensor of shape [4, num_anchors]
containing the anchor boxes.
"""
anchor_boxlist = box_list.BoxList(anchors)
y, x, h, w = anchor_boxlist.get_center_coordinates_and_sizes()
num_anchors = y.get_shape().as_list()
with tf.Session() as sess:
y_out, x_out, h_out, w_out = sess.run([y, x, h, w])
encoded_anchors = tf.constant(
np.transpose(np.stack((y_out, x_out, h_out, w_out))),
dtype=tf.float32,
shape=[num_anchors[0], _DEFAULT_NUM_COORD_BOX],
name='anchors')
return encoded_anchors
def append_postprocessing_op(frozen_graph_def, max_detections,
max_classes_per_detection, nms_score_threshold,
nms_iou_threshold, num_classes, scale_values):
"""Appends postprocessing custom op.
Args:
frozen_graph_def: Frozen GraphDef for SSD model after freezing the
checkpoint
max_detections: Maximum number of detections (boxes) to show
max_classes_per_detection: Number of classes to display per detection
nms_score_threshold: Score threshold used in Non-maximal suppression in
post-processing
nms_iou_threshold: Intersection-over-union threshold used in Non-maximal
suppression in post-processing
num_classes: number of classes in SSD detector
scale_values: scale values is a dict with following key-value pairs
{y_scale: 10, x_scale: 10, h_scale: 5, w_scale: 5} that are used in decode
centersize boxes
Returns:
transformed_graph_def: Frozen GraphDef with postprocessing custom op
appended
TFLite_Detection_PostProcess custom op node has four outputs:
detection_boxes: a float32 tensor of shape [1, num_boxes, 4] with box
locations
detection_scores: a float32 tensor of shape [1, num_boxes]
with class scores
detection_classes: a float32 tensor of shape [1, num_boxes]
with class indices
num_boxes: a float32 tensor of size 1 containing the number of detected
boxes
"""
new_output = frozen_graph_def.node.add()
new_output.op = 'TFLite_Detection_PostProcess'
new_output.name = 'TFLite_Detection_PostProcess'
new_output.attr['_output_quantized'].CopyFrom(
attr_value_pb2.AttrValue(b=True))
new_output.attr['max_detections'].CopyFrom(
attr_value_pb2.AttrValue(i=max_detections))
new_output.attr['max_classes_per_detection'].CopyFrom(
attr_value_pb2.AttrValue(i=max_classes_per_detection))
new_output.attr['nms_score_threshold'].CopyFrom(
attr_value_pb2.AttrValue(f=nms_score_threshold.pop()))
new_output.attr['nms_iou_threshold'].CopyFrom(
attr_value_pb2.AttrValue(f=nms_iou_threshold.pop()))
new_output.attr['num_classes'].CopyFrom(
attr_value_pb2.AttrValue(i=num_classes))
new_output.attr['y_scale'].CopyFrom(
attr_value_pb2.AttrValue(f=scale_values['y_scale'].pop()))
new_output.attr['x_scale'].CopyFrom(
attr_value_pb2.AttrValue(f=scale_values['x_scale'].pop()))
new_output.attr['h_scale'].CopyFrom(
attr_value_pb2.AttrValue(f=scale_values['h_scale'].pop()))
new_output.attr['w_scale'].CopyFrom(
attr_value_pb2.AttrValue(f=scale_values['w_scale'].pop()))
new_output.input.extend(
['raw_outputs/box_encodings', 'raw_outputs/class_predictions', 'anchors'])
# Transform the graph to append new postprocessing op
input_names = []
output_names = ['TFLite_Detection_PostProcess']
transforms = ['strip_unused_nodes']
transformed_graph_def = TransformGraph(frozen_graph_def, input_names,
output_names, transforms)
return transformed_graph_def
def export_tflite_graph(pipeline_config, trained_checkpoint_prefix, output_dir,
add_postprocessing_op, max_detections,
max_classes_per_detection):
"""Exports a tflite compatible graph and anchors for ssd detection model.
Anchors are written to a tensor and tflite compatible graph
is written to output_dir/tflite_graph.pb.
Args:
pipeline_config: a pipeline.proto object containing the configuration for
SSD model to export.
trained_checkpoint_prefix: a file prefix for the checkpoint containing the
trained parameters of the SSD model.
output_dir: A directory to write the tflite graph and anchor file to.
add_postprocessing_op: If add_postprocessing_op is true: frozen graph adds a
TFLite_Detection_PostProcess custom op
max_detections: Maximum number of detections (boxes) to show
max_classes_per_detection: Number of classes to display per detection
Raises:
ValueError: if the pipeline config contains models other than ssd or uses an
fixed_shape_resizer and provides a shape as well.
"""
tf.gfile.MakeDirs(output_dir)
if pipeline_config.model.WhichOneof('model') != 'ssd':
raise ValueError('Only ssd models are supported in tflite. '
'Found {} in config'.format(
pipeline_config.model.WhichOneof('model')))
num_classes = pipeline_config.model.ssd.num_classes
nms_score_threshold = {
pipeline_config.model.ssd.post_processing.batch_non_max_suppression.
score_threshold
}
nms_iou_threshold = {
pipeline_config.model.ssd.post_processing.batch_non_max_suppression.
iou_threshold
}
scale_values = {}
scale_values['y_scale'] = {
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.y_scale
}
scale_values['x_scale'] = {
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.x_scale
}
scale_values['h_scale'] = {
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.height_scale
}
scale_values['w_scale'] = {
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.width_scale
}
image_resizer_config = pipeline_config.model.ssd.image_resizer
image_resizer = image_resizer_config.WhichOneof('image_resizer_oneof')
num_channels = _DEFAULT_NUM_CHANNELS
if image_resizer == 'fixed_shape_resizer':
height = image_resizer_config.fixed_shape_resizer.height
width = image_resizer_config.fixed_shape_resizer.width
if image_resizer_config.fixed_shape_resizer.convert_to_grayscale:
num_channels = 1
shape = [1, height, width, num_channels]
else:
raise ValueError(
'Only fixed_shape_resizer'
'is supported with tflite. Found {}'.format(
image_resizer_config.WhichOneof('image_resizer_oneof')))
image = tf.placeholder(
tf.float32, shape=shape, name='normalized_input_image_tensor')
detection_model = model_builder.build(
pipeline_config.model, is_training=False)
predicted_tensors = detection_model.predict(image, true_image_shapes=None)
# The score conversion occurs before the post-processing custom op
_, score_conversion_fn = post_processing_builder.build(
pipeline_config.model.ssd.post_processing)
class_predictions = score_conversion_fn(
predicted_tensors['class_predictions_with_background'])
with tf.name_scope('raw_outputs'):
# 'raw_outputs/box_encodings': a float32 tensor of shape [1, num_anchors, 4]
# containing the encoded box predictions. Note that these are raw
# predictions and no Non-Max suppression is applied on them and
# no decode center size boxes is applied to them.
tf.identity(predicted_tensors['box_encodings'], name='box_encodings')
# 'raw_outputs/class_predictions': a float32 tensor of shape
# [1, num_anchors, num_classes] containing the class scores for each anchor
# after applying score conversion.
tf.identity(class_predictions, name='class_predictions')
# 'anchors': a float32 tensor of shape
# [4, num_anchors] containing the anchors as a constant node.
tf.identity(
get_const_center_size_encoded_anchors(predicted_tensors['anchors']),
name='anchors')
# Add global step to the graph, so we know the training step number when we
# evaluate the model.
tf.train.get_or_create_global_step()
# graph rewriter
if pipeline_config.HasField('graph_rewriter'):
graph_rewriter_config = pipeline_config.graph_rewriter
graph_rewriter_fn = graph_rewriter_builder.build(
graph_rewriter_config, is_training=False)
graph_rewriter_fn()
# freeze the graph
saver_kwargs = {}
if pipeline_config.eval_config.use_moving_averages:
saver_kwargs['write_version'] = saver_pb2.SaverDef.V1
moving_average_checkpoint = tempfile.NamedTemporaryFile()
exporter.replace_variable_values_with_moving_averages(
tf.get_default_graph(), trained_checkpoint_prefix,
moving_average_checkpoint.name)
checkpoint_to_use = moving_average_checkpoint.name
else:
checkpoint_to_use = trained_checkpoint_prefix
saver = tf.train.Saver(**saver_kwargs)
input_saver_def = saver.as_saver_def()
frozen_graph_def = exporter.freeze_graph_with_def_protos(
input_graph_def=tf.get_default_graph().as_graph_def(),
input_saver_def=input_saver_def,
input_checkpoint=checkpoint_to_use,
output_node_names=','.join([
'raw_outputs/box_encodings', 'raw_outputs/class_predictions',
'anchors'
]),
restore_op_name='save/restore_all',
filename_tensor_name='save/Const:0',
clear_devices=True,
initializer_nodes='')
# Add new operation to do post processing in a custom op (TF Lite only)
if add_postprocessing_op:
transformed_graph_def = append_postprocessing_op(
frozen_graph_def, max_detections, max_classes_per_detection,
nms_score_threshold, nms_iou_threshold, num_classes, scale_values)
else:
# Return frozen without adding post-processing custom op
transformed_graph_def = frozen_graph_def
binary_graph = os.path.join(output_dir, 'tflite_graph.pb')
with tf.gfile.GFile(binary_graph, 'wb') as f:
f.write(transformed_graph_def.SerializeToString())
txt_graph = os.path.join(output_dir, 'tflite_graph.pbtxt')
with tf.gfile.GFile(txt_graph, 'w') as f:
f.write(str(transformed_graph_def))
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.export_tflite_ssd_graph."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import numpy as np
import six
import tensorflow as tf
from object_detection import export_tflite_ssd_graph_lib
from object_detection.builders import graph_rewriter_builder
from object_detection.builders import model_builder
from object_detection.core import model
from object_detection.protos import graph_rewriter_pb2
from object_detection.protos import pipeline_pb2
from object_detection.protos import post_processing_pb2
if six.PY2:
import mock # pylint: disable=g-import-not-at-top
else:
from unittest import mock # pylint: disable=g-import-not-at-top
class FakeModel(model.DetectionModel):
def __init__(self, add_detection_masks=False):
self._add_detection_masks = add_detection_masks
def preprocess(self, inputs):
pass
def predict(self, preprocessed_inputs, true_image_shapes):
features = tf.contrib.slim.conv2d(preprocessed_inputs, 3, 1)
with tf.control_dependencies([features]):
prediction_tensors = {
'box_encodings':
tf.constant([[[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.8, 0.8]]],
tf.float32),
'class_predictions_with_background':
tf.constant([[[0.7, 0.6], [0.9, 0.0]]], tf.float32),
}
with tf.control_dependencies(
[tf.convert_to_tensor(features.get_shape().as_list()[1:3])]):
prediction_tensors['anchors'] = tf.constant(
[[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 1.0]], tf.float32)
return prediction_tensors
def postprocess(self, prediction_tensors, true_image_shapes):
pass
def restore_map(self, checkpoint_path, from_detection_checkpoint):
pass
def loss(self, prediction_dict, true_image_shapes):
pass
class ExportTfliteGraphTest(tf.test.TestCase):
def _save_checkpoint_from_mock_model(self,
checkpoint_path,
use_moving_averages,
quantize=False,
num_channels=3):
g = tf.Graph()
with g.as_default():
mock_model = FakeModel()
inputs = tf.placeholder(tf.float32, shape=[1, 10, 10, num_channels])
mock_model.predict(inputs, true_image_shapes=None)
if use_moving_averages:
tf.train.ExponentialMovingAverage(0.0).apply()
tf.train.get_or_create_global_step()
if quantize:
graph_rewriter_config = graph_rewriter_pb2.GraphRewriter()
graph_rewriter_config.quantization.delay = 500000
graph_rewriter_fn = graph_rewriter_builder.build(
graph_rewriter_config, is_training=False)
graph_rewriter_fn()
saver = tf.train.Saver()
init = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init)
saver.save(sess, checkpoint_path)
def _assert_quant_vars_exists(self, tflite_graph_file):
with tf.gfile.Open(tflite_graph_file) as f:
graph_string = f.read()
print(graph_string)
self.assertTrue('quant' in graph_string)
def _import_graph_and_run_inference(self, tflite_graph_file, num_channels=3):
"""Imports a tflite graph, runs single inference and returns outputs."""
graph = tf.Graph()
with graph.as_default():
graph_def = tf.GraphDef()
with tf.gfile.Open(tflite_graph_file) as f:
graph_def.ParseFromString(f.read())
tf.import_graph_def(graph_def, name='')
input_tensor = graph.get_tensor_by_name('normalized_input_image_tensor:0')
box_encodings = graph.get_tensor_by_name('raw_outputs/box_encodings:0')
class_predictions = graph.get_tensor_by_name(
'raw_outputs/class_predictions:0')
with self.test_session(graph) as sess:
[box_encodings_np, class_predictions_np] = sess.run(
[box_encodings, class_predictions],
feed_dict={input_tensor: np.random.rand(1, 10, 10, num_channels)})
return box_encodings_np, class_predictions_np
def _export_graph(self, pipeline_config, num_channels=3):
"""Exports a tflite graph and an anchor file."""
output_dir = self.get_temp_dir()
trained_checkpoint_prefix = os.path.join(output_dir, 'model.ckpt')
tflite_graph_file = os.path.join(output_dir, 'tflite_graph.pb')
quantize = pipeline_config.HasField('graph_rewriter')
self._save_checkpoint_from_mock_model(
trained_checkpoint_prefix,
use_moving_averages=pipeline_config.eval_config.use_moving_averages,
quantize=quantize,
num_channels=num_channels)
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel()
with tf.Graph().as_default():
export_tflite_ssd_graph_lib.export_tflite_graph(
pipeline_config=pipeline_config,
trained_checkpoint_prefix=trained_checkpoint_prefix,
output_dir=output_dir,
add_postprocessing_op=False,
max_detections=10,
max_classes_per_detection=1)
return tflite_graph_file
def test_export_tflite_graph_with_moving_averages(self):
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = True
pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.height = 10
pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.width = 10
pipeline_config.model.ssd.num_classes = 2
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.y_scale = 10.0
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.x_scale = 10.0
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.height_scale = 5.0
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.width_scale = 5.0
tflite_graph_file = self._export_graph(pipeline_config)
self.assertTrue(os.path.exists(tflite_graph_file))
(box_encodings_np, class_predictions_np
) = self._import_graph_and_run_inference(tflite_graph_file)
self.assertAllClose(box_encodings_np,
[[[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.8, 0.8]]])
self.assertAllClose(class_predictions_np, [[[0.7, 0.6], [0.9, 0.0]]])
def test_export_tflite_graph_without_moving_averages(self):
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.height = 10
pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.width = 10
pipeline_config.model.ssd.num_classes = 2
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.y_scale = 10.0
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.x_scale = 10.0
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.height_scale = 5.0
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.width_scale = 5.0
tflite_graph_file = self._export_graph(pipeline_config)
self.assertTrue(os.path.exists(tflite_graph_file))
(box_encodings_np, class_predictions_np
) = self._import_graph_and_run_inference(tflite_graph_file)
self.assertAllClose(box_encodings_np,
[[[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.8, 0.8]]])
self.assertAllClose(class_predictions_np, [[[0.7, 0.6], [0.9, 0.0]]])
def test_export_tflite_graph_grayscale(self):
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.height = 10
pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.width = 10
(pipeline_config.model.ssd.image_resizer.fixed_shape_resizer
).convert_to_grayscale = True
pipeline_config.model.ssd.num_classes = 2
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.y_scale = 10.0
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.x_scale = 10.0
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.height_scale = 5.0
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.width_scale = 5.0
tflite_graph_file = self._export_graph(pipeline_config, num_channels=1)
self.assertTrue(os.path.exists(tflite_graph_file))
(box_encodings_np,
class_predictions_np) = self._import_graph_and_run_inference(
tflite_graph_file, num_channels=1)
self.assertAllClose(box_encodings_np,
[[[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.8, 0.8]]])
self.assertAllClose(class_predictions_np, [[[0.7, 0.6], [0.9, 0.0]]])
def test_export_tflite_graph_with_quantization(self):
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.height = 10
pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.width = 10
pipeline_config.graph_rewriter.quantization.delay = 500000
pipeline_config.model.ssd.num_classes = 2
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.y_scale = 10.0
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.x_scale = 10.0
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.height_scale = 5.0
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.width_scale = 5.0
tflite_graph_file = self._export_graph(pipeline_config)
self.assertTrue(os.path.exists(tflite_graph_file))
self._assert_quant_vars_exists(tflite_graph_file)
(box_encodings_np, class_predictions_np
) = self._import_graph_and_run_inference(tflite_graph_file)
self.assertAllClose(box_encodings_np,
[[[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.8, 0.8]]])
self.assertAllClose(class_predictions_np, [[[0.7, 0.6], [0.9, 0.0]]])
def test_export_tflite_graph_with_softmax_score_conversion(self):
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
pipeline_config.model.ssd.post_processing.score_converter = (
post_processing_pb2.PostProcessing.SOFTMAX)
pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.height = 10
pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.width = 10
pipeline_config.model.ssd.num_classes = 2
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.y_scale = 10.0
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.x_scale = 10.0
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.height_scale = 5.0
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.width_scale = 5.0
tflite_graph_file = self._export_graph(pipeline_config)
self.assertTrue(os.path.exists(tflite_graph_file))
(box_encodings_np, class_predictions_np
) = self._import_graph_and_run_inference(tflite_graph_file)
self.assertAllClose(box_encodings_np,
[[[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.8, 0.8]]])
self.assertAllClose(class_predictions_np,
[[[0.524979, 0.475021], [0.710949, 0.28905]]])
def test_export_tflite_graph_with_sigmoid_score_conversion(self):
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
pipeline_config.model.ssd.post_processing.score_converter = (
post_processing_pb2.PostProcessing.SIGMOID)
pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.height = 10
pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.width = 10
pipeline_config.model.ssd.num_classes = 2
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.y_scale = 10.0
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.x_scale = 10.0
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.height_scale = 5.0
pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.width_scale = 5.0
tflite_graph_file = self._export_graph(pipeline_config)
self.assertTrue(os.path.exists(tflite_graph_file))
(box_encodings_np, class_predictions_np
) = self._import_graph_and_run_inference(tflite_graph_file)
self.assertAllClose(box_encodings_np,
[[[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.8, 0.8]]])
self.assertAllClose(class_predictions_np,
[[[0.668188, 0.645656], [0.710949, 0.5]]])
if __name__ == '__main__':
tf.test.main()
...@@ -42,6 +42,27 @@ union based on the object masks instead of object boxes. ...@@ -42,6 +42,27 @@ union based on the object masks instead of object boxes.
Similar to the weighted pascal voc 2010 detection metric, but computes the Similar to the weighted pascal voc 2010 detection metric, but computes the
intersection over union based on the object masks instead of object boxes. intersection over union based on the object masks instead of object boxes.
## COCO detection metrics
`EvalConfig.metrics_set='coco_detection_metrics'`
The COCO metrics are the official detection metrics used to score the
[COCO competition](http://cocodataset.org/) and are similar to Pascal VOC
metrics but have a slightly different implementation and report additional
statistics such as mAP at IOU thresholds of .5:.95, and precision/recall
statistics for small, medium, and large objects.
See the
[pycocotools](https://github.com/cocodataset/cocoapi/tree/master/PythonAPI)
repository for more details.
## COCO mask metrics
`EvalConfig.metrics_set='coco_mask_metrics'`
Similar to the COCO detection metrics, but computes the
intersection over union based on the object masks instead of object boxes.
## Open Images V2 detection metric ## Open Images V2 detection metric
`EvalConfig.metrics_set='open_images_V2_detection_metrics'` `EvalConfig.metrics_set='open_images_V2_detection_metrics'`
......
...@@ -50,8 +50,10 @@ python object_detection/dataset_tools/create_pet_tf_record.py \ ...@@ -50,8 +50,10 @@ python object_detection/dataset_tools/create_pet_tf_record.py \
--output_dir=`pwd` --output_dir=`pwd`
``` ```
You should end up with two TFRecord files named `pet_train.record` and You should end up with two 10-sharded TFRecord files named
`pet_val.record` in the `tensorflow/models/research/` directory. `pet_faces_train.record-?????-of-00010` and
`pet_faces_val.record-?????-of-00010` in the `tensorflow/models/research/`
directory.
The label map for the Pet dataset can be found at The label map for the Pet dataset can be found at
`object_detection/data/pet_label_map.pbtxt`. `object_detection/data/pet_label_map.pbtxt`.
# Preparing Inputs # Preparing Inputs
[TOC]
To use your own dataset in Tensorflow Object Detection API, you must convert it To use your own dataset in Tensorflow Object Detection API, you must convert it
into the [TFRecord file format](https://www.tensorflow.org/api_guides/python/python_io#tfrecords_format_details). into the [TFRecord file format](https://www.tensorflow.org/api_guides/python/python_io#tfrecords_format_details).
This document outlines how to write a script to generate the TFRecord file. This document outlines how to write a script to generate the TFRecord file.
...@@ -86,7 +88,7 @@ def create_cat_tf_example(encoded_cat_image_data): ...@@ -86,7 +88,7 @@ def create_cat_tf_example(encoded_cat_image_data):
return tf_example return tf_example
``` ```
## Conversion Script Outline ## Conversion Script Outline {#conversion-script-outline}
A typical conversion script will look like the following: A typical conversion script will look like the following:
...@@ -159,3 +161,49 @@ currently unused by the API and are optional. ...@@ -159,3 +161,49 @@ currently unused by the API and are optional.
Note: Please refer to the section on [Running an Instance Segmentation Note: Please refer to the section on [Running an Instance Segmentation
Model](instance_segmentation.md) for instructions on how to configure a model Model](instance_segmentation.md) for instructions on how to configure a model
that predicts masks in addition to object bounding boxes. that predicts masks in addition to object bounding boxes.
## Sharding datasets
When you have more than a few thousand examples, it is beneficial to shard your
dataset into multiple files:
* tf.data.Dataset API can read input examples in parallel improving
throughput.
* tf.data.Dataset API can shuffle the examples better with sharded files which
improves performance of the model slightly.
Instead of writing all tf.Example protos to a single file as shown in
[conversion script outline](#conversion-script-outline), use the snippet below.
```python
import contextlib2
from google3.third_party.tensorflow_models.object_detection.dataset_tools import tf_record_creation_util
num_shards=10
output_filebase='/path/to/train_dataset.record'
with contextlib2.ExitStack() as tf_record_close_stack:
output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
tf_record_close_stack, output_filebase, num_shards)
for index, example in examples:
tf_example = create_tf_example(example)
output_shard_index = index % num_shards
output_tfrecords[output_shard_index].write(tf_example.SerializeToString())
```
This will produce the following output files
```bash
/path/to/train_dataset.record-00000-00010
/path/to/train_dataset.record-00001-00010
...
/path/to/train_dataset.record-00009-00010
```
which can then be used in the config file as below.
```bash
tf_record_input_reader {
input_path: "/path/to/train_dataset.record-?????-of-00010"
}
```
...@@ -132,6 +132,7 @@ def transform_input_data(tensor_dict, ...@@ -132,6 +132,7 @@ def transform_input_data(tensor_dict,
merged_boxes, merged_classes, _ = util_ops.merge_boxes_with_multiple_labels( merged_boxes, merged_classes, _ = util_ops.merge_boxes_with_multiple_labels(
tensor_dict[fields.InputDataFields.groundtruth_boxes], tensor_dict[fields.InputDataFields.groundtruth_boxes],
zero_indexed_groundtruth_classes, num_classes) zero_indexed_groundtruth_classes, num_classes)
merged_classes = tf.cast(merged_classes, tf.float32)
tensor_dict[fields.InputDataFields.groundtruth_boxes] = merged_boxes tensor_dict[fields.InputDataFields.groundtruth_boxes] = merged_boxes
tensor_dict[fields.InputDataFields.groundtruth_classes] = merged_classes tensor_dict[fields.InputDataFields.groundtruth_classes] = merged_classes
......
...@@ -47,10 +47,10 @@ import functools ...@@ -47,10 +47,10 @@ import functools
import os import os
import tensorflow as tf import tensorflow as tf
from object_detection import evaluator
from object_detection.builders import dataset_builder from object_detection.builders import dataset_builder
from object_detection.builders import graph_rewriter_builder from object_detection.builders import graph_rewriter_builder
from object_detection.builders import model_builder from object_detection.builders import model_builder
from object_detection.legacy import evaluator
from object_detection.utils import config_util from object_detection.utils import config_util
from object_detection.utils import label_map_util from object_detection.utils import label_map_util
...@@ -80,6 +80,7 @@ flags.DEFINE_boolean('run_once', False, 'Option to only run a single pass of ' ...@@ -80,6 +80,7 @@ flags.DEFINE_boolean('run_once', False, 'Option to only run a single pass of '
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
@tf.contrib.framework.deprecated(None, 'Use object_detection/model_main.py.')
def main(unused_argv): def main(unused_argv):
assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.' assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.'
assert FLAGS.eval_dir, '`eval_dir` is missing.' assert FLAGS.eval_dir, '`eval_dir` is missing.'
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment