Unverified Commit fd7b6887 authored by Jonathan Huang's avatar Jonathan Huang Committed by GitHub
Browse files

Merge pull request #3293 from pkulzc/master

Internal changes of object_detection 
parents f98ec55e 1efe98bb
......@@ -24,7 +24,7 @@ import tensorflow as tf
from object_detection.dataset_tools import create_pascal_tf_record
class DictToTFExampleTest(tf.test.TestCase):
class CreatePascalTFRecordTest(tf.test.TestCase):
def _assertProtoEqual(self, proto_field, expectation):
"""Helper function to assert if a proto field equals some value.
......
......@@ -50,6 +50,8 @@ flags.DEFINE_boolean('faces_only', True, 'If True, generates bounding boxes '
'for pet faces. Otherwise generates bounding boxes (as '
'well as segmentations for full pet bodies). Note that '
'in the latter case, the resulting files are much larger.')
flags.DEFINE_string('mask_type', 'png', 'How to represent instance '
'segmentation masks. Options are "png" or "numerical".')
FLAGS = flags.FLAGS
......@@ -72,7 +74,8 @@ def dict_to_tf_example(data,
label_map_dict,
image_subdirectory,
ignore_difficult_instances=False,
faces_only=True):
faces_only=True,
mask_type='png'):
"""Convert XML derived dict to tf.Example proto.
Notice that this function normalizes the bounding box coordinates provided
......@@ -89,6 +92,8 @@ def dict_to_tf_example(data,
dataset (default: False).
faces_only: If True, generates bounding boxes for pet faces. Otherwise
generates bounding boxes (as well as segmentations for full pet bodies).
mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
smaller file sizes.
Returns:
example: The converted tf.Example.
......@@ -158,7 +163,7 @@ def dict_to_tf_example(data,
truncated.append(int(obj['truncated']))
poses.append(obj['pose'].encode('utf8'))
if not faces_only:
mask_remapped = mask_np != 2
mask_remapped = (mask_np != 2).astype(np.uint8)
masks.append(mask_remapped)
feature_dict = {
......@@ -182,10 +187,20 @@ def dict_to_tf_example(data,
'image/object/view': dataset_util.bytes_list_feature(poses),
}
if not faces_only:
if mask_type == 'numerical':
mask_stack = np.stack(masks).astype(np.float32)
masks_flattened = np.reshape(mask_stack, [-1])
feature_dict['image/object/mask'] = (
dataset_util.float_list_feature(masks_flattened.tolist()))
elif mask_type == 'png':
encoded_mask_png_list = []
for mask in masks:
img = PIL.Image.fromarray(mask)
output = io.BytesIO()
img.save(output, format='PNG')
encoded_mask_png_list.append(output.getvalue())
feature_dict['image/object/mask'] = (
dataset_util.bytes_list_feature(encoded_mask_png_list))
example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
return example
......@@ -196,7 +211,8 @@ def create_tf_record(output_filename,
annotations_dir,
image_dir,
examples,
faces_only=True):
faces_only=True,
mask_type='png'):
"""Creates a TFRecord file from examples.
Args:
......@@ -207,6 +223,8 @@ def create_tf_record(output_filename,
examples: Examples to parse and save to tf record.
faces_only: If True, generates bounding boxes for pet faces. Otherwise
generates bounding boxes (as well as segmentations for full pet bodies).
mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
smaller file sizes.
"""
writer = tf.python_io.TFRecordWriter(output_filename)
for idx, example in enumerate(examples):
......@@ -225,7 +243,12 @@ def create_tf_record(output_filename,
try:
tf_example = dict_to_tf_example(
data, mask_path, label_map_dict, image_dir, faces_only=faces_only)
data,
mask_path,
label_map_dict,
image_dir,
faces_only=faces_only,
mask_type=mask_type)
writer.write(tf_example.SerializeToString())
except ValueError:
logging.warning('Invalid example: %s, ignoring.', xml_path)
......@@ -233,7 +256,7 @@ def create_tf_record(output_filename,
writer.close()
# TODO(derekjchow): Add test for pet/PASCAL main files.
# TODO: Add test for pet/PASCAL main files.
def main(_):
data_dir = FLAGS.data_dir
label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
......@@ -262,10 +285,22 @@ def main(_):
'pet_train_with_masks.record')
val_output_path = os.path.join(FLAGS.output_dir,
'pet_val_with_masks.record')
create_tf_record(train_output_path, label_map_dict, annotations_dir,
image_dir, train_examples, faces_only=FLAGS.faces_only)
create_tf_record(val_output_path, label_map_dict, annotations_dir,
image_dir, val_examples, faces_only=FLAGS.faces_only)
create_tf_record(
train_output_path,
label_map_dict,
annotations_dir,
image_dir,
train_examples,
faces_only=FLAGS.faces_only,
mask_type=FLAGS.mask_type)
create_tf_record(
val_output_path,
label_map_dict,
annotations_dir,
image_dir,
val_examples,
faces_only=FLAGS.faces_only,
mask_type=FLAGS.mask_type)
if __name__ == '__main__':
......
#!/bin/bash
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Script to download and preprocess the MSCOCO data set for detection.
#
# The outputs of this script are TFRecord files containing serialized
# tf.Example protocol buffers. See create_coco_tf_record.py for details of how
# the tf.Example protocol buffers are constructed and see
# http://cocodataset.org/#overview for an overview of the dataset.
#
# usage:
# bash object_detection/dataset_tools/download_and_preprocess_mscoco.sh \
# /tmp/mscoco
set -e
if [ -z "$1" ]; then
echo "usage download_and_preprocess_mscoco.sh [data dir]"
exit
fi
if [ "$(uname)" == "Darwin" ]; then
UNZIP="tar -xf"
else
UNZIP="unzip -nq"
fi
# Create the output directories.
OUTPUT_DIR="${1%/}"
SCRATCH_DIR="${OUTPUT_DIR}/raw-data"
mkdir -p "${OUTPUT_DIR}"
mkdir -p "${SCRATCH_DIR}"
CURRENT_DIR=$(pwd)
# Helper function to download and unpack a .zip file.
function download_and_unzip() {
local BASE_URL=${1}
local FILENAME=${2}
if [ ! -f ${FILENAME} ]; then
echo "Downloading ${FILENAME} to $(pwd)"
wget -nd -c "${BASE_URL}/${FILENAME}"
else
echo "Skipping download of ${FILENAME}"
fi
echo "Unzipping ${FILENAME}"
${UNZIP} ${FILENAME}
}
cd ${SCRATCH_DIR}
# Download the images.
BASE_IMAGE_URL="http://images.cocodataset.org/zips"
# TRAIN_IMAGE_FILE="train2017.zip"
download_and_unzip ${BASE_IMAGE_URL} ${TRAIN_IMAGE_FILE}
TRAIN_IMAGE_DIR="${SCRATCH_DIR}/train2017"
VAL_IMAGE_FILE="val2017.zip"
download_and_unzip ${BASE_IMAGE_URL} ${VAL_IMAGE_FILE}
VAL_IMAGE_DIR="${SCRATCH_DIR}/val2017"
TEST_IMAGE_FILE="test2017.zip"
download_and_unzip ${BASE_IMAGE_URL} ${TEST_IMAGE_FILE}
TEST_IMAGE_DIR="${SCRATCH_DIR}/test2017"
# Download the annotations.
BASE_INSTANCES_URL="http://images.cocodataset.org/annotations"
INSTANCES_FILE="annotations_trainval2017.zip"
download_and_unzip ${BASE_INSTANCES_URL} ${INSTANCES_FILE}
TRAIN_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/instances_train2017.json"
VAL_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/instances_val2017.json"
# Download the test image info.
BASE_IMAGE_INFO_URL="http://images.cocodataset.org/annotations"
IMAGE_INFO_FILE="image_info_test2017.zip"
download_and_unzip ${BASE_IMAGE_INFO_URL} ${IMAGE_INFO_FILE}
TESTDEV_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/image_info_test-dev2017.json"
# # Build TFRecords of the image data.
cd "${CURRENT_DIR}"
python object_detection/dataset_tools/create_coco_tf_record.py \
--logtostderr \
--include_masks \
--train_image_dir="${TRAIN_IMAGE_DIR}" \
--val_image_dir="${VAL_IMAGE_DIR}" \
--test_image_dir="${TEST_IMAGE_DIR}" \
--train_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \
--val_annotations_file="${VAL_ANNOTATIONS_FILE}" \
--testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \
--output_dir="${OUTPUT_DIR}"
......@@ -18,7 +18,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from six.moves import xrange
import tensorflow as tf
from object_detection.core import standard_fields
......@@ -103,7 +102,7 @@ def open_sharded_output_tfrecords(exit_stack, base_path, num_shards):
"""
tf_record_output_filenames = [
'{}-{:05d}-of-{:05d}'.format(base_path, idx, num_shards)
for idx in xrange(num_shards)
for idx in range(num_shards)
]
tfrecords = [
......
......@@ -48,9 +48,10 @@ import os
import tensorflow as tf
from object_detection import evaluator
from object_detection.builders import input_reader_builder
from object_detection.builders import dataset_builder
from object_detection.builders import model_builder
from object_detection.utils import config_util
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
......@@ -103,19 +104,20 @@ def main(unused_argv):
model_config = configs['model']
eval_config = configs['eval_config']
input_config = configs['eval_input_config']
if FLAGS.eval_training_data:
input_config = configs['train_input_config']
else:
input_config = configs['eval_input_config']
model_fn = functools.partial(
model_builder.build,
model_config=model_config,
is_training=False)
create_input_dict_fn = functools.partial(
input_reader_builder.build,
input_config)
def get_next(config):
return dataset_util.make_initializable_iterator(
dataset_builder.build(config)).get_next()
create_input_dict_fn = functools.partial(get_next, input_config)
label_map = label_map_util.load_labelmap(input_config.label_map_path)
max_num_classes = max([item.id for item in label_map.item])
......
......@@ -40,14 +40,13 @@ def write_metrics(metrics, global_step, summary_dir):
summary_dir: Directory to write tensorflow summaries to.
"""
logging.info('Writing metrics to tf summary.')
summary_writer = tf.summary.FileWriter(summary_dir)
summary_writer = tf.summary.FileWriterCache.get(summary_dir)
for key in sorted(metrics):
summary = tf.Summary(value=[
tf.Summary.Value(tag=key, simple_value=metrics[key]),
])
summary_writer.add_summary(summary, global_step)
logging.info('%s: %f', key, metrics[key])
summary_writer.close()
logging.info('Metrics written to tf summary.')
......@@ -60,8 +59,12 @@ def visualize_detection_results(result_dict,
export_dir='',
agnostic_mode=False,
show_groundtruth=False,
groundtruth_box_visualization_color='black',
min_score_thresh=.5,
max_num_predictions=20):
max_num_predictions=20,
skip_scores=False,
skip_labels=False,
keep_image_id_for_visualization_export=False):
"""Visualizes detection results and writes visualizations to image summaries.
This function visualizes an image with its detected bounding boxes and writes
......@@ -99,44 +102,57 @@ def visualize_detection_results(result_dict,
class-agnostic mode or not.
show_groundtruth: boolean (default: False) controlling whether to show
groundtruth boxes in addition to detected boxes
groundtruth_box_visualization_color: box color for visualizing groundtruth
boxes
min_score_thresh: minimum score threshold for a box to be visualized
max_num_predictions: maximum number of detections to visualize
skip_scores: whether to skip score when drawing a single detection
skip_labels: whether to skip label when drawing a single detection
keep_image_id_for_visualization_export: whether to keep image identifier in
filename when exported to export_dir
Raises:
ValueError: if result_dict does not contain the expected keys (i.e.,
'original_image', 'detection_boxes', 'detection_scores',
'detection_classes')
"""
detection_fields = fields.DetectionResultFields
input_fields = fields.InputDataFields
if not set([
'original_image', 'detection_boxes', 'detection_scores',
'detection_classes'
input_fields.original_image,
detection_fields.detection_boxes,
detection_fields.detection_scores,
detection_fields.detection_classes,
]).issubset(set(result_dict.keys())):
raise ValueError('result_dict does not contain all expected keys.')
if show_groundtruth and 'groundtruth_boxes' not in result_dict:
if show_groundtruth and input_fields.groundtruth_boxes not in result_dict:
raise ValueError('If show_groundtruth is enabled, result_dict must contain '
'groundtruth_boxes.')
logging.info('Creating detection visualizations.')
category_index = label_map_util.create_category_index(categories)
image = np.squeeze(result_dict['original_image'], axis=0)
detection_boxes = result_dict['detection_boxes']
detection_scores = result_dict['detection_scores']
detection_classes = np.int32((result_dict['detection_classes']))
detection_keypoints = result_dict.get('detection_keypoints', None)
detection_masks = result_dict.get('detection_masks', None)
image = np.squeeze(result_dict[input_fields.original_image], axis=0)
detection_boxes = result_dict[detection_fields.detection_boxes]
detection_scores = result_dict[detection_fields.detection_scores]
detection_classes = np.int32((result_dict[
detection_fields.detection_classes]))
detection_keypoints = result_dict.get(detection_fields.detection_keypoints)
detection_masks = result_dict.get(detection_fields.detection_masks)
detection_boundaries = result_dict.get(detection_fields.detection_boundaries)
# Plot groundtruth underneath detections
if show_groundtruth:
groundtruth_boxes = result_dict['groundtruth_boxes']
groundtruth_keypoints = result_dict.get('groundtruth_keypoints', None)
groundtruth_boxes = result_dict[input_fields.groundtruth_boxes]
groundtruth_keypoints = result_dict.get(input_fields.groundtruth_keypoints)
vis_utils.visualize_boxes_and_labels_on_image_array(
image,
groundtruth_boxes,
None,
None,
category_index,
image=image,
boxes=groundtruth_boxes,
classes=None,
scores=None,
category_index=category_index,
keypoints=groundtruth_keypoints,
use_normalized_coordinates=False,
max_boxes_to_draw=None)
max_boxes_to_draw=None,
groundtruth_box_visualization_color=groundtruth_box_visualization_color)
vis_utils.visualize_boxes_and_labels_on_image_array(
image,
detection_boxes,
......@@ -144,13 +160,22 @@ def visualize_detection_results(result_dict,
detection_scores,
category_index,
instance_masks=detection_masks,
instance_boundaries=detection_boundaries,
keypoints=detection_keypoints,
use_normalized_coordinates=False,
max_boxes_to_draw=max_num_predictions,
min_score_thresh=min_score_thresh,
agnostic_mode=agnostic_mode)
agnostic_mode=agnostic_mode,
skip_scores=skip_scores,
skip_labels=skip_labels)
if export_dir:
if keep_image_id_for_visualization_export and result_dict[fields.
InputDataFields()
.key]:
export_path = os.path.join(export_dir, 'export-{}-{}.png'.format(
tag, result_dict[fields.InputDataFields().key]))
else:
export_path = os.path.join(export_dir, 'export-{}.png'.format(tag))
vis_utils.save_image_array_as_png(image, export_path)
......@@ -161,9 +186,8 @@ def visualize_detection_results(result_dict,
encoded_image_string=vis_utils.encode_image_array_as_png_str(
image)))
])
summary_writer = tf.summary.FileWriter(summary_dir)
summary_writer = tf.summary.FileWriterCache.get(summary_dir)
summary_writer.add_summary(summary, global_step)
summary_writer.close()
logging.info('Detection visualizations written to summary with tag %s.', tag)
......@@ -260,8 +284,10 @@ def _run_checkpoint_once(tensor_dict,
result_dict = {}
else:
result_dict = batch_processor(tensor_dict, sess, batch, counters)
if not result_dict:
continue
for evaluator in evaluators:
# TODO: Use image_id tensor once we fix the input data
# TODO(b/65130867): Use image_id tensor once we fix the input data
# decoders to return correct image_id.
# TODO: result_dict contains batches of images, while
# add_single_ground_truth_image_info expects a single image. Fix
......@@ -422,9 +448,9 @@ def result_dict_for_single_example(image,
(Optional).
class_agnostic: Boolean indicating whether the detections are class-agnostic
(i.e. binary). Default False.
scale_to_absolute: Boolean indicating whether boxes, masks, keypoints should
be scaled to absolute coordinates. Note that for IoU based evaluations,
it does not matter whether boxes are expressed in absolute or relative
scale_to_absolute: Boolean indicating whether boxes and keypoints should be
scaled to absolute coordinates. Note that for IoU based evaluations, it
does not matter whether boxes are expressed in absolute or relative
coordinates. Default False.
Returns:
......@@ -436,8 +462,8 @@ def result_dict_for_single_example(image,
`scale_to_absolute`.
'detection_scores': [max_detections] float32 tensor of scores.
'detection_classes': [max_detections] int64 tensor of 1-indexed classes.
'detection_masks': [max_detections, None, None] float32 tensor of binarized
masks. (Only present if available in `detections`)
'detection_masks': [max_detections, H, W] float32 tensor of binarized
masks, reframed to full image masks.
'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
normalized or absolute coordinates, depending on the value of
`scale_to_absolute`. (Optional)
......@@ -481,14 +507,17 @@ def result_dict_for_single_example(image,
if detection_fields.detection_masks in detections:
detection_masks = detections[detection_fields.detection_masks][0]
output_dict[detection_fields.detection_masks] = detection_masks
if scale_to_absolute:
# TODO: This should be done in model's postprocess
# function ideally.
num_detections = tf.to_int32(detections[detection_fields.num_detections][0])
detection_boxes = tf.slice(
detection_boxes, begin=[0, 0], size=[num_detections, -1])
detection_masks = tf.slice(
detection_masks, begin=[0, 0, 0], size=[num_detections, -1, -1])
detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
detection_masks, detection_boxes, image_shape[1], image_shape[2])
detection_masks_reframed = tf.to_float(
tf.greater(detection_masks_reframed, 0.5))
detection_masks_reframed = tf.cast(
tf.greater(detection_masks_reframed, 0.5), tf.uint8)
output_dict[detection_fields.detection_masks] = detection_masks_reframed
if detection_fields.detection_keypoints in detections:
detection_keypoints = detections[detection_fields.detection_keypoints][0]
......@@ -500,6 +529,9 @@ def result_dict_for_single_example(image,
absolute_detection_keypoints)
if groundtruth:
if input_data_fields.groundtruth_instance_masks in groundtruth:
groundtruth[input_data_fields.groundtruth_instance_masks] = tf.cast(
groundtruth[input_data_fields.groundtruth_instance_masks], tf.uint8)
output_dict.update(groundtruth)
if scale_to_absolute:
groundtruth_boxes = groundtruth[input_data_fields.groundtruth_boxes]
......
......@@ -24,20 +24,31 @@ import tensorflow as tf
from object_detection import eval_util
from object_detection.core import prefetcher
from object_detection.core import standard_fields as fields
from object_detection.metrics import coco_evaluation
from object_detection.utils import object_detection_evaluation
# A dictionary of metric names to classes that implement the metric. The classes
# in the dictionary must implement
# utils.object_detection_evaluation.DetectionEvaluator interface.
EVAL_METRICS_CLASS_DICT = {
'pascal_voc_metrics':
'pascal_voc_detection_metrics':
object_detection_evaluation.PascalDetectionEvaluator,
'weighted_pascal_voc_metrics':
'weighted_pascal_voc_detection_metrics':
object_detection_evaluation.WeightedPascalDetectionEvaluator,
'open_images_metrics':
object_detection_evaluation.OpenImagesDetectionEvaluator
'pascal_voc_instance_segmentation_metrics':
object_detection_evaluation.PascalInstanceSegmentationEvaluator,
'weighted_pascal_voc_instance_segmentation_metrics':
object_detection_evaluation.WeightedPascalInstanceSegmentationEvaluator,
'open_images_detection_metrics':
object_detection_evaluation.OpenImagesDetectionEvaluator,
'coco_detection_metrics':
coco_evaluation.CocoDetectionEvaluator,
'coco_mask_metrics':
coco_evaluation.CocoMaskEvaluator,
}
EVAL_DEFAULT_METRIC = 'pascal_voc_detection_metrics'
def _extract_prediction_tensors(model,
create_input_dict_fn,
......@@ -56,9 +67,10 @@ def _extract_prediction_tensors(model,
prefetch_queue = prefetcher.prefetch(input_dict, capacity=500)
input_dict = prefetch_queue.dequeue()
original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0)
preprocessed_image = model.preprocess(tf.to_float(original_image))
prediction_dict = model.predict(preprocessed_image)
detections = model.postprocess(prediction_dict)
preprocessed_image, true_image_shapes = model.preprocess(
tf.to_float(original_image))
prediction_dict = model.predict(preprocessed_image, true_image_shapes)
detections = model.postprocess(prediction_dict, true_image_shapes)
groundtruth = None
if not ignore_groundtruth:
......@@ -103,17 +115,20 @@ def get_evaluators(eval_config, categories):
Raises:
ValueError: if metric is not in the metric class dictionary.
"""
eval_metric_fn_key = eval_config.metrics_set
eval_metric_fn_keys = eval_config.metrics_set
if not eval_metric_fn_keys:
eval_metric_fn_keys = [EVAL_DEFAULT_METRIC]
evaluators_list = []
for eval_metric_fn_key in eval_metric_fn_keys:
if eval_metric_fn_key not in EVAL_METRICS_CLASS_DICT:
raise ValueError('Metric not found: {}'.format(eval_metric_fn_key))
return [
EVAL_METRICS_CLASS_DICT[eval_metric_fn_key](
categories=categories)
]
evaluators_list.append(
EVAL_METRICS_CLASS_DICT[eval_metric_fn_key](categories=categories))
return evaluators_list
def evaluate(create_input_dict_fn, create_model_fn, eval_config, categories,
checkpoint_dir, eval_dir):
checkpoint_dir, eval_dir, graph_hook_fn=None):
"""Evaluation function for detection models.
Args:
......@@ -124,6 +139,10 @@ def evaluate(create_input_dict_fn, create_model_fn, eval_config, categories,
have an integer 'id' field and string 'name' field.
checkpoint_dir: directory to load the checkpoints to evaluate from.
eval_dir: directory to write evaluation metrics summary to.
graph_hook_fn: Optional function that is called after the training graph is
completely built. This is helpful to perform additional changes to the
training graph such as optimizing batchnorm. The function should modify
the default graph.
Returns:
metrics: A dictionary containing metric names and values from the latest
......@@ -177,12 +196,23 @@ def evaluate(create_input_dict_fn, create_model_fn, eval_config, categories,
categories=categories,
summary_dir=eval_dir,
export_dir=eval_config.visualization_export_dir,
show_groundtruth=eval_config.visualization_export_dir)
show_groundtruth=eval_config.visualize_groundtruth_boxes,
groundtruth_box_visualization_color=eval_config.
groundtruth_box_visualization_color,
min_score_thresh=eval_config.min_score_threshold,
max_num_predictions=eval_config.max_num_boxes_to_visualize,
skip_scores=eval_config.skip_scores,
skip_labels=eval_config.skip_labels,
keep_image_id_for_visualization_export=eval_config.
keep_image_id_for_visualization_export)
return result_dict
variables_to_restore = tf.global_variables()
global_step = tf.train.get_or_create_global_step()
variables_to_restore.append(global_step)
if graph_hook_fn: graph_hook_fn()
if eval_config.use_moving_averages:
variable_averages = tf.train.ExponentialMovingAverage(0.0)
variables_to_restore = variable_averages.variables_to_restore()
......
......@@ -65,6 +65,31 @@ with contents:
- model.ckpt.meta
- frozen_inference_graph.pb
+ saved_model (a directory)
Config overrides (see the `config_override` flag) are text protobufs
(also of type pipeline_pb2.TrainEvalPipelineConfig) which are used to override
certain fields in the provided pipeline_config_path. These are useful for
making small changes to the inference graph that differ from the training or
eval config.
Example Usage (in which we change the second stage post-processing score
threshold to be 0.5):
python export_inference_graph \
--input_type image_tensor \
--pipeline_config_path path/to/ssd_inception_v2.config \
--trained_checkpoint_prefix path/to/model.ckpt \
--output_directory path/to/exported_model_directory \
--config_override " \
model{ \
faster_rcnn { \
second_stage_post_processing { \
batch_non_max_suppression { \
score_threshold: 0.5 \
} \
} \
} \
}"
"""
import tensorflow as tf
from google.protobuf import text_format
......@@ -92,7 +117,9 @@ flags.DEFINE_string('trained_checkpoint_prefix', None,
'Path to trained checkpoint, typically of the form '
'path/to/model.ckpt')
flags.DEFINE_string('output_directory', None, 'Path to write outputs.')
flags.DEFINE_string('config_override', '',
'pipeline_pb2.TrainEvalPipelineConfig '
'text proto to override pipeline_config_path.')
tf.app.flags.mark_flag_as_required('pipeline_config_path')
tf.app.flags.mark_flag_as_required('trained_checkpoint_prefix')
tf.app.flags.mark_flag_as_required('output_directory')
......@@ -103,6 +130,7 @@ def main(_):
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f:
text_format.Merge(f.read(), pipeline_config)
text_format.Merge(FLAGS.config_override, pipeline_config)
if FLAGS.input_shape:
input_shape = [
int(dim) if dim != '-1' else None
......
......@@ -18,7 +18,8 @@ import logging
import os
import tempfile
import tensorflow as tf
from tensorflow.core.protobuf import rewriter_config_pb2
from google.protobuf import text_format
from tensorflow.core.protobuf import saver_pb2
from tensorflow.python import pywrap_tensorflow
from tensorflow.python.client import session
from tensorflow.python.framework import graph_util
......@@ -43,7 +44,6 @@ def freeze_graph_with_def_protos(
filename_tensor_name,
clear_devices,
initializer_nodes,
optimize_graph=True,
variable_names_blacklist=''):
"""Converts all variables in a graph and checkpoint into constants."""
del restore_op_name, filename_tensor_name # Unused by updated loading code.
......@@ -65,20 +65,7 @@ def freeze_graph_with_def_protos(
with tf.Graph().as_default():
tf.import_graph_def(input_graph_def, name='')
if optimize_graph:
logging.info('Graph Rewriter optimizations enabled')
rewrite_options = rewriter_config_pb2.RewriterConfig(
layout_optimizer=rewriter_config_pb2.RewriterConfig.ON)
rewrite_options.optimizers.append('pruning')
rewrite_options.optimizers.append('constfold')
rewrite_options.optimizers.append('layout')
graph_options = tf.GraphOptions(
rewrite_options=rewrite_options, infer_shapes=True)
else:
logging.info('Graph Rewriter optimizations disabled')
graph_options = tf.GraphOptions()
config = tf.ConfigProto(graph_options=graph_options)
config = tf.ConfigProto(graph_options=tf.GraphOptions())
with session.Session(config=config) as sess:
if input_saver_def:
saver = saver_lib.Saver(saver_def=input_saver_def)
......@@ -227,23 +214,31 @@ def _add_output_tensor_nodes(postprocessed_tensors,
Returns:
A tensor dict containing the added output tensor nodes.
"""
detection_fields = fields.DetectionResultFields
label_id_offset = 1
boxes = postprocessed_tensors.get('detection_boxes')
scores = postprocessed_tensors.get('detection_scores')
classes = postprocessed_tensors.get('detection_classes') + label_id_offset
masks = postprocessed_tensors.get('detection_masks')
num_detections = postprocessed_tensors.get('num_detections')
boxes = postprocessed_tensors.get(detection_fields.detection_boxes)
scores = postprocessed_tensors.get(detection_fields.detection_scores)
classes = postprocessed_tensors.get(
detection_fields.detection_classes) + label_id_offset
masks = postprocessed_tensors.get(detection_fields.detection_masks)
num_detections = postprocessed_tensors.get(detection_fields.num_detections)
outputs = {}
outputs['detection_boxes'] = tf.identity(boxes, name='detection_boxes')
outputs['detection_scores'] = tf.identity(scores, name='detection_scores')
outputs['detection_classes'] = tf.identity(classes, name='detection_classes')
outputs['num_detections'] = tf.identity(num_detections, name='num_detections')
outputs[detection_fields.detection_boxes] = tf.identity(
boxes, name=detection_fields.detection_boxes)
outputs[detection_fields.detection_scores] = tf.identity(
scores, name=detection_fields.detection_scores)
outputs[detection_fields.detection_classes] = tf.identity(
classes, name=detection_fields.detection_classes)
outputs[detection_fields.num_detections] = tf.identity(
num_detections, name=detection_fields.num_detections)
if masks is not None:
outputs['detection_masks'] = tf.identity(masks, name='detection_masks')
outputs[detection_fields.detection_masks] = tf.identity(
masks, name=detection_fields.detection_masks)
for output_key in outputs:
tf.add_to_collection(output_collection_name, outputs[output_key])
if masks is not None:
tf.add_to_collection(output_collection_name, outputs['detection_masks'])
tf.add_to_collection(output_collection_name,
outputs[detection_fields.detection_masks])
return outputs
......@@ -328,8 +323,8 @@ def _export_inference_graph(input_type,
output_directory,
additional_output_tensor_names=None,
input_shape=None,
optimize_graph=True,
output_collection_name='inference_op'):
output_collection_name='inference_op',
graph_hook_fn=None):
"""Export helper."""
tf.gfile.MakeDirs(output_directory)
frozen_graph_path = os.path.join(output_directory,
......@@ -348,24 +343,34 @@ def _export_inference_graph(input_type,
placeholder_tensor, input_tensors = input_placeholder_fn_map[input_type](
**placeholder_args)
inputs = tf.to_float(input_tensors)
preprocessed_inputs = detection_model.preprocess(inputs)
output_tensors = detection_model.predict(preprocessed_inputs)
postprocessed_tensors = detection_model.postprocess(output_tensors)
preprocessed_inputs, true_image_shapes = detection_model.preprocess(inputs)
output_tensors = detection_model.predict(
preprocessed_inputs, true_image_shapes)
postprocessed_tensors = detection_model.postprocess(
output_tensors, true_image_shapes)
outputs = _add_output_tensor_nodes(postprocessed_tensors,
output_collection_name)
# Add global step to the graph.
slim.get_or_create_global_step()
if graph_hook_fn: graph_hook_fn()
saver_kwargs = {}
if use_moving_averages:
temp_checkpoint_file = tempfile.NamedTemporaryFile()
# This check is to be compatible with both version of SaverDef.
if os.path.isfile(trained_checkpoint_prefix):
saver_kwargs['write_version'] = saver_pb2.SaverDef.V1
temp_checkpoint_prefix = tempfile.NamedTemporaryFile().name
else:
temp_checkpoint_prefix = tempfile.mkdtemp()
replace_variable_values_with_moving_averages(
tf.get_default_graph(), trained_checkpoint_prefix,
temp_checkpoint_file.name)
checkpoint_to_use = temp_checkpoint_file.name
temp_checkpoint_prefix)
checkpoint_to_use = temp_checkpoint_prefix
else:
checkpoint_to_use = trained_checkpoint_prefix
saver = tf.train.Saver()
saver = tf.train.Saver(**saver_kwargs)
input_saver_def = saver.as_saver_def()
_write_graph_and_checkpoint(
......@@ -387,7 +392,6 @@ def _export_inference_graph(input_type,
restore_op_name='save/restore_all',
filename_tensor_name='save/Const:0',
clear_devices=True,
optimize_graph=optimize_graph,
initializer_nodes='')
_write_frozen_graph(frozen_graph_path, frozen_graph_def)
_write_saved_model(saved_model_path, frozen_graph_def,
......@@ -399,7 +403,6 @@ def export_inference_graph(input_type,
trained_checkpoint_prefix,
output_directory,
input_shape=None,
optimize_graph=True,
output_collection_name='inference_op',
additional_output_tensor_names=None):
"""Exports inference graph for the model specified in the pipeline config.
......@@ -412,7 +415,6 @@ def export_inference_graph(input_type,
output_directory: Path to write outputs.
input_shape: Sets a fixed shape for an `image_tensor` input. If not
specified, will default to [None, None, None, 3].
optimize_graph: Whether to optimize graph using Grappler.
output_collection_name: Name of collection to add output tensors to.
If None, does not add output tensors to a collection.
additional_output_tensor_names: list of additional output
......@@ -424,4 +426,10 @@ def export_inference_graph(input_type,
pipeline_config.eval_config.use_moving_averages,
trained_checkpoint_prefix,
output_directory, additional_output_tensor_names,
input_shape, optimize_graph, output_collection_name)
input_shape, output_collection_name,
graph_hook_fn=None)
pipeline_config.eval_config.use_moving_averages = False
config_text = text_format.MessageToString(pipeline_config)
with tf.gfile.Open(
os.path.join(output_directory, 'pipeline.config'), 'wb') as f:
f.write(config_text)
......@@ -18,6 +18,7 @@ import os
import numpy as np
import six
import tensorflow as tf
from google.protobuf import text_format
from object_detection import exporter
from object_detection.builders import model_builder
from object_detection.core import model
......@@ -37,12 +38,13 @@ class FakeModel(model.DetectionModel):
self._add_detection_masks = add_detection_masks
def preprocess(self, inputs):
return tf.identity(inputs)
true_image_shapes = [] # Doesn't matter for the fake model.
return tf.identity(inputs), true_image_shapes
def predict(self, preprocessed_inputs):
def predict(self, preprocessed_inputs, true_image_shapes):
return {'image': tf.layers.conv2d(preprocessed_inputs, 3, 1)}
def postprocess(self, prediction_dict):
def postprocess(self, prediction_dict, true_image_shapes):
with tf.control_dependencies(prediction_dict.values()):
postprocessed_tensors = {
'detection_boxes': tf.constant([[[0.0, 0.0, 0.5, 0.5],
......@@ -63,7 +65,7 @@ class FakeModel(model.DetectionModel):
def restore_map(self, checkpoint_path, from_detection_checkpoint):
pass
def loss(self, prediction_dict):
def loss(self, prediction_dict, true_image_shapes):
pass
......@@ -74,10 +76,10 @@ class ExportInferenceGraphTest(tf.test.TestCase):
g = tf.Graph()
with g.as_default():
mock_model = FakeModel()
preprocessed_inputs = mock_model.preprocess(
preprocessed_inputs, true_image_shapes = mock_model.preprocess(
tf.placeholder(tf.float32, shape=[None, None, None, 3]))
predictions = mock_model.predict(preprocessed_inputs)
mock_model.postprocess(predictions)
predictions = mock_model.predict(preprocessed_inputs, true_image_shapes)
mock_model.postprocess(predictions, true_image_shapes)
if use_moving_averages:
tf.train.ExponentialMovingAverage(0.0).apply()
slim.get_or_create_global_step()
......@@ -213,10 +215,10 @@ class ExportInferenceGraphTest(tf.test.TestCase):
graph = tf.Graph()
with graph.as_default():
fake_model = FakeModel()
preprocessed_inputs = fake_model.preprocess(
preprocessed_inputs, true_image_shapes = fake_model.preprocess(
tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3]))
predictions = fake_model.predict(preprocessed_inputs)
fake_model.postprocess(predictions)
predictions = fake_model.predict(preprocessed_inputs, true_image_shapes)
fake_model.postprocess(predictions, true_image_shapes)
exporter.replace_variable_values_with_moving_averages(
graph, trained_checkpoint_prefix, new_checkpoint_prefix)
......@@ -448,7 +450,7 @@ class ExportInferenceGraphTest(tf.test.TestCase):
masks = inference_graph.get_tensor_by_name('detection_masks:0')
num_detections = inference_graph.get_tensor_by_name('num_detections:0')
with self.assertRaisesRegexp(tf.errors.InvalidArgumentError,
'^TensorArray has inconsistent shapes.'):
'TensorArray.*shape'):
sess.run([boxes, scores, classes, masks, num_detections],
feed_dict={image_str_tensor: image_str_batch_np})
......@@ -495,6 +497,31 @@ class ExportInferenceGraphTest(tf.test.TestCase):
self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4]))
self.assertAllClose(num_detections_np, [2, 1])
def test_export_graph_saves_pipeline_file(self):
tmp_dir = self.get_temp_dir()
trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
use_moving_averages=True)
output_directory = os.path.join(tmp_dir, 'output')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel()
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
exporter.export_inference_graph(
input_type='image_tensor',
pipeline_config=pipeline_config,
trained_checkpoint_prefix=trained_checkpoint_prefix,
output_directory=output_directory)
expected_pipeline_path = os.path.join(
output_directory, 'pipeline.config')
self.assertTrue(os.path.exists(expected_pipeline_path))
written_pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.gfile.GFile(expected_pipeline_path, 'r') as f:
proto_str = f.read()
text_format.Merge(proto_str, written_pipeline_config)
self.assertProtoEquals(pipeline_config, written_pipeline_config)
def test_export_saved_model_and_run_inference(self):
tmp_dir = self.get_temp_dir()
trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
......
......@@ -23,7 +23,7 @@ In the table below, we list each such pre-trained model including:
* detector performance on subset of the COCO validation set or Open Images test split as measured by the dataset-specific mAP measure.
Here, higher is better, and we only report bounding box mAP rounded to the
nearest integer.
* Output types (currently only `Boxes`)
* Output types (`Boxes`, and `Masks` if applicable )
You can un-tar each tar.gz file via, e.g.,:
......@@ -55,7 +55,7 @@ Some remarks on frozen inference graphs:
a detector (and discarding the part past that point), which negatively impacts
standard mAP metrics.
* Our frozen inference graphs are generated using the
[v1.4.0](https://github.com/tensorflow/tensorflow/tree/v1.4.0)
[v1.5.0](https://github.com/tensorflow/tensorflow/tree/v1.5.0)
release version of Tensorflow and we do not guarantee that these will work
with other versions; this being said, each frozen inference graph can be
regenerated using your current version of Tensorflow by re-running the
......@@ -69,16 +69,20 @@ Some remarks on frozen inference graphs:
| ------------ | :--------------: | :--------------: | :-------------: |
| [ssd_mobilenet_v1_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2017_11_17.tar.gz) | 30 | 21 | Boxes |
| [ssd_inception_v2_coco](http://download.tensorflow.org/models/object_detection/ssd_inception_v2_coco_2017_11_17.tar.gz) | 42 | 24 | Boxes |
| [faster_rcnn_inception_v2_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2017_11_08.tar.gz) | 58 | 28 | Boxes |
| [faster_rcnn_resnet50_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_coco_2017_11_08.tar.gz) | 89 | 30 | Boxes |
| [faster_rcnn_resnet50_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_lowproposals_coco_2017_11_08.tar.gz) | 64 | | Boxes |
| [rfcn_resnet101_coco](http://download.tensorflow.org/models/object_detection/rfcn_resnet101_coco_2017_11_08.tar.gz) | 92 | 30 | Boxes |
| [faster_rcnn_resnet101_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_coco_2017_11_08.tar.gz) | 106 | 32 | Boxes |
| [faster_rcnn_resnet101_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_lowproposals_coco_2017_11_08.tar.gz) | 82 | | Boxes |
| [faster_rcnn_inception_resnet_v2_atrous_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_coco_2017_11_08.tar.gz) | 620 | 37 | Boxes |
| [faster_rcnn_inception_resnet_v2_atrous_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_coco_2017_11_08.tar.gz) | 241 | | Boxes |
| [faster_rcnn_nas](http://download.tensorflow.org/models/object_detection/faster_rcnn_nas_coco_2017_11_08.tar.gz) | 1833 | 43 | Boxes |
| [faster_rcnn_nas_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_nas_lowproposals_coco_2017_11_08.tar.gz) | 540 | | Boxes |
| [faster_rcnn_inception_v2_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2018_01_28.tar.gz) | 58 | 28 | Boxes |
| [faster_rcnn_resnet50_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_coco_2018_01_28.tar.gz) | 89 | 30 | Boxes |
| [faster_rcnn_resnet50_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_lowproposals_coco_2018_01_28.tar.gz) | 64 | | Boxes |
| [rfcn_resnet101_coco](http://download.tensorflow.org/models/object_detection/rfcn_resnet101_coco_2018_01_28.tar.gz) | 92 | 30 | Boxes |
| [faster_rcnn_resnet101_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_coco_2018_01_28.tar.gz) | 106 | 32 | Boxes |
| [faster_rcnn_resnet101_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_lowproposals_coco_2018_01_28.tar.gz) | 82 | | Boxes |
| [faster_rcnn_inception_resnet_v2_atrous_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_coco_2018_01_28.tar.gz) | 620 | 37 | Boxes |
| [faster_rcnn_inception_resnet_v2_atrous_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_coco_2018_01_28.tar.gz) | 241 | | Boxes |
| [faster_rcnn_nas](http://download.tensorflow.org/models/object_detection/faster_rcnn_nas_coco_2018_01_28.tar.gz) | 1833 | 43 | Boxes |
| [faster_rcnn_nas_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_nas_lowproposals_coco_2018_01_28.tar.gz) | 540 | | Boxes |
| [mask_rcnn_inception_resnet_v2_atrous_coco](http://download.tensorflow.org/models/object_detection/mask_rcnn_inception_resnet_v2_atrous_coco_2018_01_28.tar.gz) | 771 | 36 | Masks |
| [mask_rcnn_inception_v2_coco](http://download.tensorflow.org/models/object_detection/mask_rcnn_inception_v2_coco_2018_01_28.tar.gz) | 79 | 25 | Masks |
| [mask_rcnn_resnet101_atrous_coco](http://download.tensorflow.org/models/object_detection/mask_rcnn_resnet101_atrous_coco_2018_01_28.tar.gz) | 470 | 33 | Masks |
| [mask_rcnn_resnet50_atrous_coco](http://download.tensorflow.org/models/object_detection/mask_rcnn_resnet50_atrous_coco_2018_01_28.tar.gz) | 343 | 29 | Masks |
......@@ -86,14 +90,14 @@ Some remarks on frozen inference graphs:
Model name | Speed (ms) | Pascal mAP@0.5 (ms) | Outputs
----------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---: | :-------------: | :-----:
[faster_rcnn_resnet101_kitti](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_kitti_2017_11_08.tar.gz) | 79 | 87 | Boxes
[faster_rcnn_resnet101_kitti](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_kitti_2018_01_28.tar.gz) | 79 | 87 | Boxes
## Open Images-trained models {#open-images-models}
Model name | Speed (ms) | Open Images mAP@0.5[^2] | Outputs
----------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---: | :-------------: | :-----:
[faster_rcnn_inception_resnet_v2_atrous_oid](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid_2017_11_08.tar.gz) | 727 | 37 | Boxes
[faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid_2017_11_08.tar.gz) | 347 | | Boxes
[faster_rcnn_inception_resnet_v2_atrous_oid](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid_2018_01_28.tar.gz) | 727 | 37 | Boxes
[faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid_2018_01_28.tar.gz) | 347 | | Boxes
[^1]: See [MSCOCO evaluation protocol](http://cocodataset.org/#detections-eval).
......
......@@ -4,17 +4,16 @@ The Tensorflow Object Detection API currently supports three evaluation protocol
that can be configured in `EvalConfig` by setting `metrics_set` to the
corresponding value.
## PASCAL VOC 2007 metric
## PASCAL VOC 2007 detection metric
`EvalConfig.metrics_set='pascal_voc_metrics'`
`EvalConfig.metrics_set='pascal_voc_detection_metrics'`
The commonly used mAP metric for evaluating the quality of object detectors, computed according to the protocol of the PASCAL VOC Challenge 2007.
The protocol is available [here](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/devkit_doc_07-Jun-2007.pdf).
## Weighted PASCAL VOC detection metric
## Weighted PASCAL VOC metric
`EvalConfig.metrics_set='weighted_pascal_voc_metrics'`
`EvalConfig.metrics_set='weighted_pascal_voc_detection_metrics'`
The weighted PASCAL metric computes the mean average precision as the average
precision when treating all classes as a single class. In comparison,
......@@ -25,7 +24,21 @@ For example, the test set consists of two classes, "cat" and "dog", and there ar
According to PASCAL VOC 2007 metric, performance on each of the two classes would contribute equally towards the final mAP value,
while for the Weighted PASCAL VOC metric the final mAP value will be influenced by frequency of each class.
## Open Images metric {#open-images}
## PASCAL VOC 2007 instance segmentation metric
`EvalConfig.metrics_set='pascal_voc_instance_segmentation_metrics'`
Similar to pascal voc 2007 detection metric, but computes the intersection over
union based on the object masks instead of object boxes.
## Weighted PASCAL VOC detection metric
`EvalConfig.metrics_set='weighted_pascal_voc_instance_segmentation_metrics'`
Similar to the weighted pascal voc 2007 detection metric, but computes the
intersection over union based on the object masks instead of object boxes.
## Open Images detection metric {#open-images}
`EvalConfig.metrics_set='open_images_metrics'`
......
......@@ -8,7 +8,7 @@ graph proto. A checkpoint will typically consist of three files:
* model.ckpt-${CHECKPOINT_NUMBER}.meta
After you've identified a candidate checkpoint to export, run the following
command from tensorflow/models/research/:
command from tensorflow/models/research:
``` bash
# From tensorflow/models/research/
......
## Run an Instance Segmentation Model
For some applications it isn't adequate enough to localize an object with a
simple bounding box. For instance, you might want to segment an object region
once it is detected. This class of problems is called **instance segmentation**.
<p align="center">
<img src="img/kites_with_segment_overlay.png" width=676 height=450>
</p>
### Materializing data for instance segmentation {#materializing-instance-seg}
Instance segmentation is an extension of object detection, where a binary mask
(i.e. object vs. background) is associated with every bounding box. This allows
for more fine-grained information about the extent of the object within the box.
To train an instance segmentation model, a groundtruth mask must be supplied for
every groundtruth bounding box. In additional to the proto fields listed in the
section titled [Using your own dataset](using_your_own_dataset.md), one must
also supply `image/object/mask`, which can either be a repeated list of
single-channel encoded PNG strings, or a single dense 3D binary tensor where
masks corresponding to each object are stacked along the first dimension. Each
is described in more detail below.
#### PNG Instance Segmentation Masks
Instance segmentation masks can be supplied as serialized PNG images.
```shell
image/object/mask = ["\x89PNG\r\n\x1A\n\x00\x00\x00\rIHDR\...", ...]
```
These masks are whole-image masks, one for each object instance. The spatial
dimensions of each mask must agree with the image. Each mask has only a single
channel, and the pixel values are either 0 (background) or 1 (object mask).
**PNG masks are the preferred parameterization since they offer considerable
space savings compared to dense numerical masks.**
#### Dense Numerical Instance Segmentation Masks
Masks can also be specified via a dense numerical tensor.
```shell
image/object/mask = [0.0, 0.0, 1.0, 1.0, 0.0, ...]
```
For an image with dimensions `H` x `W` and `num_boxes` groundtruth boxes, the
mask corresponds to a [`num_boxes`, `H`, `W`] float32 tensor, flattened into a
single vector of shape `num_boxes` * `H` * `W`. In TensorFlow, examples are read
in row-major format, so the elements are organized as:
```shell
... mask 0 row 0 ... mask 0 row 1 ... // ... mask 0 row H-1 ... mask 1 row 0 ...
```
where each row has W contiguous binary values.
To see an example tf-records with mask labels, see the examples under the
[Preparing Inputs](preparing_inputs.md) section.
### Pre-existing config files
We provide four instance segmentation config files that you can use to train
your own models:
1. <a href="https://github.com/tensorflow/models/blob/master/research/object_detection/samples/configs/mask_rcnn_inception_resnet_v2_atrous_coco.config" target=_blank>mask_rcnn_inception_resnet_v2_atrous_coco</a>
1. <a href="https://github.com/tensorflow/models/blob/master/research/object_detection/samples/configs/mask_rcnn_resnet101_atrous_coco.config" target=_blank>mask_rcnn_resnet101_atrous_coco</a>
1. <a href="https://github.com/tensorflow/models/blob/master/research/object_detection/samples/configs/mask_rcnn_resnet50_atrous_coco.config" target=_blank>mask_rcnn_resnet50_atrous_coco</a>
1. <a href="https://github.com/tensorflow/models/blob/master/research/object_detection/samples/configs/mask_rcnn_inception_v2_coco.config" target=_blank>mask_rcnn_inception_v2_coco</a>
For more details see the [detection model zoo](detection_model_zoo.md).
### Updating a Faster R-CNN config file
Currently, the only supported instance segmentation model is [Mask
R-CNN](https://arxiv.org/abs/1703.06870), which requires Faster R-CNN as the
backbone object detector.
Once you have a baseline Faster R-CNN pipeline configuration, you can make the
following modifications in order to convert it into a Mask R-CNN model.
1. Within `train_input_reader` and `eval_input_reader`, set
`load_instance_masks` to `True`. If using PNG masks, set `mask_type` to
`PNG_MASKS`, otherwise you can leave it as the default 'NUMERICAL_MASKS'.
1. Within the `faster_rcnn` config, use a `MaskRCNNBoxPredictor` as the
`second_stage_box_predictor`.
1. Within the `MaskRCNNBoxPredictor` message, set `predict_instance_masks` to
`True`. You must also define `conv_hyperparams`.
1. Within the `faster_rcnn` message, set `number_of_stages` to `3`.
1. Add instance segmentation metrics to the set of metrics:
`'coco_mask_metrics'`.
1. Update the `input_path`s to point at your data.
Please refer to the section on [Running the pets dataset](running_pets.md) for
additional details.
> Note: The mask prediction branch consists of a sequence of convolution layers.
> You can set the number of convolution layers and their depth as follows:
>
> 1. Within the `MaskRCNNBoxPredictor` message, set the
> `mask_prediction_conv_depth` to your value of interest. The default value
> is 256. If you set it to `0` (recommended), the depth is computed
> automatically based on the number of classes in the dataset.
> 1. Within the `MaskRCNNBoxPredictor` message, set the
> `mask_prediction_num_conv_layers` to your value of interest. The default
> value is 2.
......@@ -308,6 +308,21 @@ python object_detection/export_inference_graph.py \
Afterwards, you should see a directory named `exported_graphs` containing the
SavedModel and frozen graph.
## Configuring the Instance Segmentation Pipeline
Mask prediction can be turned on for an object detection config by adding
`predict_instance_masks: true` within the `MaskRCNNBoxPredictor`. Other
parameters such as mask size, number of convolutions in the mask layer, and the
convolution hyper parameters can be defined. We will use
`mask_rcnn_resnet101_pets.config` as a starting point for configuring the
instance segmentation pipeline. Everything above that was mentioned about object
detection holds true for instance segmentation. Instance segmentation consists
of an object detection model with an additional head that predicts the object
mask inside each predicted box once we remove the training and other details.
Please refer to the section on [Running an Instance Segmentation
Model](instance_segmentation.md) for instructions on how to configure a model
that predicts masks in addition to object bounding boxes.
## What's Next
Congratulations, you have now trained an object detector for various cats and
......
......@@ -103,7 +103,7 @@ FLAGS = flags.FLAGS
def create_tf_example(example):
# TODO(user): Populate the following variables from your example.
# TODO: Populate the following variables from your example.
height = None # Image height
width = None # Image width
filename = None # Filename of the image. Empty if image is not from file
......@@ -139,7 +139,7 @@ def create_tf_example(example):
def main(_):
writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
# TODO(user): Write code to read in your dataset to examples variable
# TODO: Write code to read in your dataset to examples variable
for example in examples:
tf_example = create_tf_example(example)
......@@ -155,3 +155,7 @@ if __name__ == '__main__':
Note: You may notice additional fields in some other datasets. They are
currently unused by the API and are optional.
Note: Please refer to the section on [Running an Instance Segmentation
Model](instance_segmentation.md) for instructions on how to configure a model
that predicts masks in addition to object bounding boxes.
......@@ -13,7 +13,7 @@ py_library(
srcs = ["detection_inference.py"],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/core:standard_fields",
"//tensorflow/models/research/object_detection/core:standard_fields",
],
)
......@@ -22,11 +22,11 @@ py_test(
srcs = ["detection_inference_test.py"],
deps = [
":detection_inference",
"//third_party/py/PIL:pil",
"//third_party/py/numpy",
"//PIL:pil",
"//numpy",
"//tensorflow",
"//tensorflow_models/object_detection/core:standard_fields",
"//tensorflow_models/object_detection/utils:dataset_util",
"//tensorflow/models/research/object_detection/core:standard_fields",
"//tensorflow/models/research/object_detection/utils:dataset_util",
],
)
......
......@@ -17,6 +17,7 @@ r"""Tests for detection_inference.py."""
import os
import StringIO
import numpy as np
from PIL import Image
import tensorflow as tf
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment