Commit 44fa1d37 authored by Alex Lee's avatar Alex Lee
Browse files

Merge remote-tracking branch 'upstream/master'

parents d3628a74 6e367f67
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test for create_pascal_tf_record.py."""
import os
import numpy as np
import PIL.Image
import tensorflow as tf
from object_detection import create_pascal_tf_record
class DictToTFExampleTest(tf.test.TestCase):
def _assertProtoEqual(self, proto_field, expectation):
"""Helper function to assert if a proto field equals some value.
Args:
proto_field: The protobuf field to compare.
expectation: The expected value of the protobuf field.
"""
proto_list = [p for p in proto_field]
self.assertListEqual(proto_list, expectation)
def test_dict_to_tf_example(self):
image_file_name = 'tmp_image.jpg'
image_data = np.random.rand(256, 256, 3)
save_path = os.path.join(self.get_temp_dir(), image_file_name)
image = PIL.Image.fromarray(image_data, 'RGB')
image.save(save_path)
data = {
'folder': '',
'filename': image_file_name,
'size': {
'height': 256,
'width': 256,
},
'object': [
{
'difficult': 1,
'bndbox': {
'xmin': 64,
'ymin': 64,
'xmax': 192,
'ymax': 192,
},
'name': 'person',
'truncated': 0,
'pose': '',
},
],
}
label_map_dict = {
'background': 0,
'person': 1,
'notperson': 2,
}
example = create_pascal_tf_record.dict_to_tf_example(
data, self.get_temp_dir(), label_map_dict, image_subdirectory='')
self._assertProtoEqual(
example.features.feature['image/height'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/width'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/filename'].bytes_list.value,
[image_file_name])
self._assertProtoEqual(
example.features.feature['image/source_id'].bytes_list.value,
[image_file_name])
self._assertProtoEqual(
example.features.feature['image/format'].bytes_list.value, ['jpeg'])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/class/text'].bytes_list.value,
['person'])
self._assertProtoEqual(
example.features.feature['image/object/class/label'].int64_list.value,
[1])
self._assertProtoEqual(
example.features.feature['image/object/difficult'].int64_list.value,
[1])
self._assertProtoEqual(
example.features.feature['image/object/truncated'].int64_list.value,
[0])
self._assertProtoEqual(
example.features.feature['image/object/view'].bytes_list.value, [''])
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Convert the Oxford pet dataset to TFRecord for object_detection.
See: O. M. Parkhi, A. Vedaldi, A. Zisserman, C. V. Jawahar
Cats and Dogs
IEEE Conference on Computer Vision and Pattern Recognition, 2012
http://www.robots.ox.ac.uk/~vgg/data/pets/
Example usage:
./create_pet_tf_record --data_dir=/home/user/pet \
--output_dir=/home/user/pet/output
"""
import hashlib
import io
import logging
import os
import random
import re
from lxml import etree
import PIL.Image
import tensorflow as tf
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
flags = tf.app.flags
flags.DEFINE_string('data_dir', '', 'Root directory to raw pet dataset.')
flags.DEFINE_string('output_dir', '', 'Path to directory to output TFRecords.')
flags.DEFINE_string('label_map_path', 'data/pet_label_map.pbtxt',
'Path to label map proto')
FLAGS = flags.FLAGS
def get_class_name_from_filename(file_name):
"""Gets the class name from a file.
Args:
file_name: The file name to get the class name from.
ie. "american_pit_bull_terrier_105.jpg"
Returns:
example: The converted tf.Example.
"""
match = re.match(r'([A-Za-z_]+)(_[0-9]+\.jpg)', file_name, re.I)
return match.groups()[0]
def dict_to_tf_example(data,
label_map_dict,
image_subdirectory,
ignore_difficult_instances=False):
"""Convert XML derived dict to tf.Example proto.
Notice that this function normalizes the bounding box coordinates provided
by the raw data.
Args:
data: dict holding PASCAL XML fields for a single image (obtained by
running dataset_util.recursive_parse_xml_to_dict)
label_map_dict: A map from string label names to integers ids.
image_subdirectory: String specifying subdirectory within the
Pascal dataset directory holding the actual image data.
ignore_difficult_instances: Whether to skip difficult instances in the
dataset (default: False).
Returns:
example: The converted tf.Example.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
"""
img_path = os.path.join(image_subdirectory, data['filename'])
with tf.gfile.GFile(img_path, 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = PIL.Image.open(encoded_jpg_io)
if image.format != 'JPEG':
raise ValueError('Image format not JPEG')
key = hashlib.sha256(encoded_jpg).hexdigest()
width = int(data['size']['width'])
height = int(data['size']['height'])
xmin = []
ymin = []
xmax = []
ymax = []
classes = []
classes_text = []
truncated = []
poses = []
difficult_obj = []
for obj in data['object']:
difficult = bool(int(obj['difficult']))
if ignore_difficult_instances and difficult:
continue
difficult_obj.append(int(difficult))
xmin.append(float(obj['bndbox']['xmin']) / width)
ymin.append(float(obj['bndbox']['ymin']) / height)
xmax.append(float(obj['bndbox']['xmax']) / width)
ymax.append(float(obj['bndbox']['ymax']) / height)
class_name = get_class_name_from_filename(data['filename'])
classes_text.append(class_name.encode('utf8'))
classes.append(label_map_dict[class_name])
truncated.append(int(obj['truncated']))
poses.append(obj['pose'].encode('utf8'))
example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(
data['filename'].encode('utf8')),
'image/source_id': dataset_util.bytes_feature(
data['filename'].encode('utf8')),
'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
'image/object/truncated': dataset_util.int64_list_feature(truncated),
'image/object/view': dataset_util.bytes_list_feature(poses),
}))
return example
def create_tf_record(output_filename,
label_map_dict,
annotations_dir,
image_dir,
examples):
"""Creates a TFRecord file from examples.
Args:
output_filename: Path to where output file is saved.
label_map_dict: The label map dictionary.
annotations_dir: Directory where annotation files are stored.
image_dir: Directory where image files are stored.
examples: Examples to parse and save to tf record.
"""
writer = tf.python_io.TFRecordWriter(output_filename)
for idx, example in enumerate(examples):
if idx % 100 == 0:
logging.info('On image %d of %d', idx, len(examples))
path = os.path.join(annotations_dir, 'xmls', example + '.xml')
if not os.path.exists(path):
logging.warning('Could not find %s, ignoring example.', path)
continue
with tf.gfile.GFile(path, 'r') as fid:
xml_str = fid.read()
xml = etree.fromstring(xml_str)
data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
tf_example = dict_to_tf_example(data, label_map_dict, image_dir)
writer.write(tf_example.SerializeToString())
writer.close()
# TODO: Add test for pet/PASCAL main files.
def main(_):
data_dir = FLAGS.data_dir
label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
logging.info('Reading from Pet dataset.')
image_dir = os.path.join(data_dir, 'images')
annotations_dir = os.path.join(data_dir, 'annotations')
examples_path = os.path.join(annotations_dir, 'trainval.txt')
examples_list = dataset_util.read_examples_list(examples_path)
# Test images are not included in the downloaded data set, so we shall perform
# our own split.
random.seed(42)
random.shuffle(examples_list)
num_examples = len(examples_list)
num_train = int(0.7 * num_examples)
train_examples = examples_list[:num_train]
val_examples = examples_list[num_train:]
logging.info('%d training and %d validation examples.',
len(train_examples), len(val_examples))
train_output_path = os.path.join(FLAGS.output_dir, 'pet_train.record')
val_output_path = os.path.join(FLAGS.output_dir, 'pet_val.record')
create_tf_record(train_output_path, label_map_dict, annotations_dir,
image_dir, train_examples)
create_tf_record(val_output_path, label_map_dict, annotations_dir,
image_dir, val_examples)
if __name__ == '__main__':
tf.app.run()
item {
name: "/m/01g317"
id: 1
display_name: "person"
}
item {
name: "/m/0199g"
id: 2
display_name: "bicycle"
}
item {
name: "/m/0k4j"
id: 3
display_name: "car"
}
item {
name: "/m/04_sv"
id: 4
display_name: "motorcycle"
}
item {
name: "/m/05czz6l"
id: 5
display_name: "airplane"
}
item {
name: "/m/01bjv"
id: 6
display_name: "bus"
}
item {
name: "/m/07jdr"
id: 7
display_name: "train"
}
item {
name: "/m/07r04"
id: 8
display_name: "truck"
}
item {
name: "/m/019jd"
id: 9
display_name: "boat"
}
item {
name: "/m/015qff"
id: 10
display_name: "traffic light"
}
item {
name: "/m/01pns0"
id: 11
display_name: "fire hydrant"
}
item {
name: "/m/02pv19"
id: 13
display_name: "stop sign"
}
item {
name: "/m/015qbp"
id: 14
display_name: "parking meter"
}
item {
name: "/m/0cvnqh"
id: 15
display_name: "bench"
}
item {
name: "/m/015p6"
id: 16
display_name: "bird"
}
item {
name: "/m/01yrx"
id: 17
display_name: "cat"
}
item {
name: "/m/0bt9lr"
id: 18
display_name: "dog"
}
item {
name: "/m/03k3r"
id: 19
display_name: "horse"
}
item {
name: "/m/07bgp"
id: 20
display_name: "sheep"
}
item {
name: "/m/01xq0k1"
id: 21
display_name: "cow"
}
item {
name: "/m/0bwd_0j"
id: 22
display_name: "elephant"
}
item {
name: "/m/01dws"
id: 23
display_name: "bear"
}
item {
name: "/m/0898b"
id: 24
display_name: "zebra"
}
item {
name: "/m/03bk1"
id: 25
display_name: "giraffe"
}
item {
name: "/m/01940j"
id: 27
display_name: "backpack"
}
item {
name: "/m/0hnnb"
id: 28
display_name: "umbrella"
}
item {
name: "/m/080hkjn"
id: 31
display_name: "handbag"
}
item {
name: "/m/01rkbr"
id: 32
display_name: "tie"
}
item {
name: "/m/01s55n"
id: 33
display_name: "suitcase"
}
item {
name: "/m/02wmf"
id: 34
display_name: "frisbee"
}
item {
name: "/m/071p9"
id: 35
display_name: "skis"
}
item {
name: "/m/06__v"
id: 36
display_name: "snowboard"
}
item {
name: "/m/018xm"
id: 37
display_name: "sports ball"
}
item {
name: "/m/02zt3"
id: 38
display_name: "kite"
}
item {
name: "/m/03g8mr"
id: 39
display_name: "baseball bat"
}
item {
name: "/m/03grzl"
id: 40
display_name: "baseball glove"
}
item {
name: "/m/06_fw"
id: 41
display_name: "skateboard"
}
item {
name: "/m/019w40"
id: 42
display_name: "surfboard"
}
item {
name: "/m/0dv9c"
id: 43
display_name: "tennis racket"
}
item {
name: "/m/04dr76w"
id: 44
display_name: "bottle"
}
item {
name: "/m/09tvcd"
id: 46
display_name: "wine glass"
}
item {
name: "/m/08gqpm"
id: 47
display_name: "cup"
}
item {
name: "/m/0dt3t"
id: 48
display_name: "fork"
}
item {
name: "/m/04ctx"
id: 49
display_name: "knife"
}
item {
name: "/m/0cmx8"
id: 50
display_name: "spoon"
}
item {
name: "/m/04kkgm"
id: 51
display_name: "bowl"
}
item {
name: "/m/09qck"
id: 52
display_name: "banana"
}
item {
name: "/m/014j1m"
id: 53
display_name: "apple"
}
item {
name: "/m/0l515"
id: 54
display_name: "sandwich"
}
item {
name: "/m/0cyhj_"
id: 55
display_name: "orange"
}
item {
name: "/m/0hkxq"
id: 56
display_name: "broccoli"
}
item {
name: "/m/0fj52s"
id: 57
display_name: "carrot"
}
item {
name: "/m/01b9xk"
id: 58
display_name: "hot dog"
}
item {
name: "/m/0663v"
id: 59
display_name: "pizza"
}
item {
name: "/m/0jy4k"
id: 60
display_name: "donut"
}
item {
name: "/m/0fszt"
id: 61
display_name: "cake"
}
item {
name: "/m/01mzpv"
id: 62
display_name: "chair"
}
item {
name: "/m/02crq1"
id: 63
display_name: "couch"
}
item {
name: "/m/03fp41"
id: 64
display_name: "potted plant"
}
item {
name: "/m/03ssj5"
id: 65
display_name: "bed"
}
item {
name: "/m/04bcr3"
id: 67
display_name: "dining table"
}
item {
name: "/m/09g1w"
id: 70
display_name: "toilet"
}
item {
name: "/m/07c52"
id: 72
display_name: "tv"
}
item {
name: "/m/01c648"
id: 73
display_name: "laptop"
}
item {
name: "/m/020lf"
id: 74
display_name: "mouse"
}
item {
name: "/m/0qjjc"
id: 75
display_name: "remote"
}
item {
name: "/m/01m2v"
id: 76
display_name: "keyboard"
}
item {
name: "/m/050k8"
id: 77
display_name: "cell phone"
}
item {
name: "/m/0fx9l"
id: 78
display_name: "microwave"
}
item {
name: "/m/029bxz"
id: 79
display_name: "oven"
}
item {
name: "/m/01k6s3"
id: 80
display_name: "toaster"
}
item {
name: "/m/0130jx"
id: 81
display_name: "sink"
}
item {
name: "/m/040b_t"
id: 82
display_name: "refrigerator"
}
item {
name: "/m/0bt_c3"
id: 84
display_name: "book"
}
item {
name: "/m/01x3z"
id: 85
display_name: "clock"
}
item {
name: "/m/02s195"
id: 86
display_name: "vase"
}
item {
name: "/m/01lsmm"
id: 87
display_name: "scissors"
}
item {
name: "/m/0kmg4"
id: 88
display_name: "teddy bear"
}
item {
name: "/m/03wvsk"
id: 89
display_name: "hair drier"
}
item {
name: "/m/012xff"
id: 90
display_name: "toothbrush"
}
item {
id: 0
name: 'none_of_the_above'
}
item {
id: 1
name: 'aeroplane'
}
item {
id: 2
name: 'bicycle'
}
item {
id: 3
name: 'bird'
}
item {
id: 4
name: 'boat'
}
item {
id: 5
name: 'bottle'
}
item {
id: 6
name: 'bus'
}
item {
id: 7
name: 'car'
}
item {
id: 8
name: 'cat'
}
item {
id: 9
name: 'chair'
}
item {
id: 10
name: 'cow'
}
item {
id: 11
name: 'diningtable'
}
item {
id: 12
name: 'dog'
}
item {
id: 13
name: 'horse'
}
item {
id: 14
name: 'motorbike'
}
item {
id: 15
name: 'person'
}
item {
id: 16
name: 'pottedplant'
}
item {
id: 17
name: 'sheep'
}
item {
id: 18
name: 'sofa'
}
item {
id: 19
name: 'train'
}
item {
id: 20
name: 'tvmonitor'
}
item {
id: 0
name: 'none_of_the_above'
}
item {
id: 1
name: 'Abyssinian'
}
item {
id: 2
name: 'american_bulldog'
}
item {
id: 3
name: 'american_pit_bull_terrier'
}
item {
id: 4
name: 'basset_hound'
}
item {
id: 5
name: 'beagle'
}
item {
id: 6
name: 'Bengal'
}
item {
id: 7
name: 'Birman'
}
item {
id: 8
name: 'Bombay'
}
item {
id: 9
name: 'boxer'
}
item {
id: 10
name: 'British_Shorthair'
}
item {
id: 11
name: 'chihuahua'
}
item {
id: 12
name: 'Egyptian_Mau'
}
item {
id: 13
name: 'english_cocker_spaniel'
}
item {
id: 14
name: 'english_setter'
}
item {
id: 15
name: 'german_shorthaired'
}
item {
id: 16
name: 'great_pyrenees'
}
item {
id: 17
name: 'havanese'
}
item {
id: 18
name: 'japanese_chin'
}
item {
id: 19
name: 'keeshond'
}
item {
id: 20
name: 'leonberger'
}
item {
id: 21
name: 'Maine_Coon'
}
item {
id: 22
name: 'miniature_pinscher'
}
item {
id: 23
name: 'newfoundland'
}
item {
id: 24
name: 'Persian'
}
item {
id: 25
name: 'pomeranian'
}
item {
id: 26
name: 'pug'
}
item {
id: 27
name: 'Ragdoll'
}
item {
id: 28
name: 'Russian_Blue'
}
item {
id: 29
name: 'saint_bernard'
}
item {
id: 30
name: 'samoyed'
}
item {
id: 31
name: 'scottish_terrier'
}
item {
id: 32
name: 'shiba_inu'
}
item {
id: 33
name: 'Siamese'
}
item {
id: 34
name: 'Sphynx'
}
item {
id: 35
name: 'staffordshire_bull_terrier'
}
item {
id: 36
name: 'wheaten_terrier'
}
item {
id: 37
name: 'yorkshire_terrier'
}
# Tensorflow Object Detection API: data decoders.
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
# Apache 2.0
py_library(
name = "tf_example_decoder",
srcs = ["tf_example_decoder.py"],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/core:data_decoder",
"//tensorflow_models/object_detection/core:standard_fields",
],
)
py_test(
name = "tf_example_decoder_test",
srcs = ["tf_example_decoder_test.py"],
deps = [
":tf_example_decoder",
"//tensorflow",
"//tensorflow_models/object_detection/core:standard_fields",
],
)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tensorflow Example proto decoder for object detection.
A decoder to decode string tensors containing serialized tensorflow.Example
protos for object detection.
"""
import tensorflow as tf
from object_detection.core import data_decoder
from object_detection.core import standard_fields as fields
slim_example_decoder = tf.contrib.slim.tfexample_decoder
class TfExampleDecoder(data_decoder.DataDecoder):
"""Tensorflow Example proto decoder."""
def __init__(self):
"""Constructor sets keys_to_features and items_to_handlers."""
self.keys_to_features = {
'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'),
'image/filename': tf.FixedLenFeature((), tf.string, default_value=''),
'image/key/sha256': tf.FixedLenFeature((), tf.string, default_value=''),
'image/source_id': tf.FixedLenFeature((), tf.string, default_value=''),
'image/height': tf.FixedLenFeature((), tf.int64, 1),
'image/width': tf.FixedLenFeature((), tf.int64, 1),
# Object boxes and classes.
'image/object/bbox/xmin': tf.VarLenFeature(tf.float32),
'image/object/bbox/xmax': tf.VarLenFeature(tf.float32),
'image/object/bbox/ymin': tf.VarLenFeature(tf.float32),
'image/object/bbox/ymax': tf.VarLenFeature(tf.float32),
'image/object/class/label': tf.VarLenFeature(tf.int64),
'image/object/area': tf.VarLenFeature(tf.float32),
'image/object/is_crowd': tf.VarLenFeature(tf.int64),
'image/object/difficult': tf.VarLenFeature(tf.int64),
# Instance masks and classes.
'image/segmentation/object': tf.VarLenFeature(tf.int64),
'image/segmentation/object/class': tf.VarLenFeature(tf.int64)
}
self.items_to_handlers = {
fields.InputDataFields.image: slim_example_decoder.Image(
image_key='image/encoded', format_key='image/format', channels=3),
fields.InputDataFields.source_id: (
slim_example_decoder.Tensor('image/source_id')),
fields.InputDataFields.key: (
slim_example_decoder.Tensor('image/key/sha256')),
fields.InputDataFields.filename: (
slim_example_decoder.Tensor('image/filename')),
# Object boxes and classes.
fields.InputDataFields.groundtruth_boxes: (
slim_example_decoder.BoundingBox(
['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/')),
fields.InputDataFields.groundtruth_classes: (
slim_example_decoder.Tensor('image/object/class/label')),
fields.InputDataFields.groundtruth_area: slim_example_decoder.Tensor(
'image/object/area'),
fields.InputDataFields.groundtruth_is_crowd: (
slim_example_decoder.Tensor('image/object/is_crowd')),
fields.InputDataFields.groundtruth_difficult: (
slim_example_decoder.Tensor('image/object/difficult')),
# Instance masks and classes.
fields.InputDataFields.groundtruth_instance_masks: (
slim_example_decoder.ItemHandlerCallback(
['image/segmentation/object', 'image/height', 'image/width'],
self._reshape_instance_masks)),
fields.InputDataFields.groundtruth_instance_classes: (
slim_example_decoder.Tensor('image/segmentation/object/class')),
}
def decode(self, tf_example_string_tensor):
"""Decodes serialized tensorflow example and returns a tensor dictionary.
Args:
tf_example_string_tensor: a string tensor holding a serialized tensorflow
example proto.
Returns:
A dictionary of the following tensors.
fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3]
containing image.
fields.InputDataFields.source_id - string tensor containing original
image id.
fields.InputDataFields.key - string tensor with unique sha256 hash key.
fields.InputDataFields.filename - string tensor with original dataset
filename.
fields.InputDataFields.groundtruth_boxes - 2D float32 tensor of shape
[None, 4] containing box corners.
fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape
[None] containing classes for the boxes.
fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape
[None] containing containing object mask area in pixel squared.
fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape
[None] indicating if the boxes enclose a crowd.
fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape
[None] indicating if the boxes represent `difficult` instances.
fields.InputDataFields.groundtruth_instance_masks - 3D int64 tensor of
shape [None, None, None] containing instance masks.
fields.InputDataFields.groundtruth_instance_classes - 1D int64 tensor
of shape [None] containing classes for the instance masks.
"""
serialized_example = tf.reshape(tf_example_string_tensor, shape=[])
decoder = slim_example_decoder.TFExampleDecoder(self.keys_to_features,
self.items_to_handlers)
keys = decoder.list_items()
tensors = decoder.decode(serialized_example, items=keys)
tensor_dict = dict(zip(keys, tensors))
is_crowd = fields.InputDataFields.groundtruth_is_crowd
tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
return tensor_dict
def _reshape_instance_masks(self, keys_to_tensors):
"""Reshape instance segmentation masks.
The instance segmentation masks are reshaped to [num_instances, height,
width] and cast to boolean type to save memory.
Args:
keys_to_tensors: a dictionary from keys to tensors.
Returns:
A 3-D boolean tensor of shape [num_instances, height, width].
"""
masks = keys_to_tensors['image/segmentation/object']
if isinstance(masks, tf.SparseTensor):
masks = tf.sparse_tensor_to_dense(masks)
height = keys_to_tensors['image/height']
width = keys_to_tensors['image/width']
to_shape = tf.cast(tf.stack([-1, height, width]), tf.int32)
return tf.cast(tf.reshape(masks, to_shape), tf.bool)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.data_decoders.tf_example_decoder."""
import numpy as np
import tensorflow as tf
from object_detection.core import standard_fields as fields
from object_detection.data_decoders import tf_example_decoder
class TfExampleDecoderTest(tf.test.TestCase):
def _EncodeImage(self, image_tensor, encoding_type='jpeg'):
with self.test_session():
if encoding_type == 'jpeg':
image_encoded = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
elif encoding_type == 'png':
image_encoded = tf.image.encode_png(tf.constant(image_tensor)).eval()
else:
raise ValueError('Invalid encoding type.')
return image_encoded
def _DecodeImage(self, image_encoded, encoding_type='jpeg'):
with self.test_session():
if encoding_type == 'jpeg':
image_decoded = tf.image.decode_jpeg(tf.constant(image_encoded)).eval()
elif encoding_type == 'png':
image_decoded = tf.image.decode_png(tf.constant(image_encoded)).eval()
else:
raise ValueError('Invalid encoding type.')
return image_decoded
def _Int64Feature(self, value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
def _FloatFeature(self, value):
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
def _BytesFeature(self, value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def testDecodeJpegImage(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
decoded_jpeg = self._DecodeImage(encoded_jpeg)
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/source_id': self._BytesFeature('image_id'),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
get_shape().as_list()), [None, None, 3])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(decoded_jpeg, tensor_dict[fields.InputDataFields.image])
self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
def testDecodeImageKeyAndFilename(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/key/sha256': self._BytesFeature('abc'),
'image/filename': self._BytesFeature('filename')
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertEqual('abc', tensor_dict[fields.InputDataFields.key])
self.assertEqual('filename', tensor_dict[fields.InputDataFields.filename])
def testDecodePngImage(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_png = self._EncodeImage(image_tensor, encoding_type='png')
decoded_png = self._DecodeImage(encoded_png, encoding_type='png')
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_png),
'image/format': self._BytesFeature('png'),
'image/source_id': self._BytesFeature('image_id')
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
get_shape().as_list()), [None, None, 3])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(decoded_png, tensor_dict[fields.InputDataFields.image])
self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
def testDecodeBoundingBox(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_ymins = [0.0, 4.0]
bbox_xmins = [1.0, 5.0]
bbox_ymaxs = [2.0, 6.0]
bbox_xmaxs = [3.0, 7.0]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/bbox/ymin': self._FloatFeature(bbox_ymins),
'image/object/bbox/xmin': self._FloatFeature(bbox_xmins),
'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs),
'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes].
get_shape().as_list()), [None, 4])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
expected_boxes = np.vstack([bbox_ymins, bbox_xmins,
bbox_ymaxs, bbox_xmaxs]).transpose()
self.assertAllEqual(expected_boxes,
tensor_dict[fields.InputDataFields.groundtruth_boxes])
def testDecodeObjectLabel(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_classes = [0, 1]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/class/label': self._Int64Feature(bbox_classes),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_classes].get_shape().as_list()),
[None])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(bbox_classes,
tensor_dict[fields.InputDataFields.groundtruth_classes])
def testDecodeObjectArea(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_area = [100., 174.]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/area': self._FloatFeature(object_area),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_area].
get_shape().as_list()), [None])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(object_area,
tensor_dict[fields.InputDataFields.groundtruth_area])
def testDecodeObjectIsCrowd(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_is_crowd = [0, 1]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/is_crowd': self._Int64Feature(object_is_crowd),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_is_crowd].get_shape().as_list()),
[None])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual([bool(item) for item in object_is_crowd],
tensor_dict[
fields.InputDataFields.groundtruth_is_crowd])
def testDecodeObjectDifficult(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_difficult = [0, 1]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/difficult': self._Int64Feature(object_difficult),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_difficult].get_shape().as_list()),
[None])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual([bool(item) for item in object_difficult],
tensor_dict[
fields.InputDataFields.groundtruth_difficult])
def testDecodeInstanceSegmentation(self):
num_instances = 4
image_height = 5
image_width = 3
# Randomly generate image.
image_tensor = np.random.randint(255, size=(image_height,
image_width,
3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
# Randomly generate instance segmentation masks.
instance_segmentation = (
np.random.randint(2, size=(num_instances,
image_height,
image_width)).astype(np.int64))
# Randomly generate class labels for each instance.
instance_segmentation_classes = np.random.randint(
100, size=(num_instances)).astype(np.int64)
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/height': self._Int64Feature([image_height]),
'image/width': self._Int64Feature([image_width]),
'image/segmentation/object': self._Int64Feature(
instance_segmentation.flatten()),
'image/segmentation/object/class': self._Int64Feature(
instance_segmentation_classes)})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((
tensor_dict[fields.InputDataFields.groundtruth_instance_masks].
get_shape().as_list()), [None, None, None])
self.assertAllEqual((
tensor_dict[fields.InputDataFields.groundtruth_instance_classes].
get_shape().as_list()), [None])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(
instance_segmentation.astype(np.bool),
tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
self.assertAllEqual(
instance_segmentation_classes,
tensor_dict[fields.InputDataFields.groundtruth_instance_classes])
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Evaluation executable for detection models.
This executable is used to evaluate DetectionModels. There are two ways of
configuring the eval job.
1) A single pipeline_pb2.TrainEvalPipelineConfig file maybe specified instead.
In this mode, the --eval_training_data flag may be given to force the pipeline
to evaluate on training data instead.
Example usage:
./eval \
--logtostderr \
--checkpoint_dir=path/to/checkpoint_dir \
--eval_dir=path/to/eval_dir \
--pipeline_config_path=pipeline_config.pbtxt
2) Three configuration files may be provided: a model_pb2.DetectionModel
configuration file to define what type of DetectionModel is being evaulated, an
input_reader_pb2.InputReader file to specify what data the model is evaluating
and an eval_pb2.EvalConfig file to configure evaluation parameters.
Example usage:
./eval \
--logtostderr \
--checkpoint_dir=path/to/checkpoint_dir \
--eval_dir=path/to/eval_dir \
--eval_config_path=eval_config.pbtxt \
--model_config_path=model_config.pbtxt \
--input_config_path=eval_input_config.pbtxt
"""
import functools
import tensorflow as tf
from google.protobuf import text_format
from object_detection import evaluator
from object_detection.builders import input_reader_builder
from object_detection.builders import model_builder
from object_detection.protos import eval_pb2
from object_detection.protos import input_reader_pb2
from object_detection.protos import model_pb2
from object_detection.protos import pipeline_pb2
from object_detection.utils import label_map_util
tf.logging.set_verbosity(tf.logging.INFO)
flags = tf.app.flags
flags.DEFINE_boolean('eval_training_data', False,
'If training data should be evaluated for this job.')
flags.DEFINE_string('checkpoint_dir', '',
'Directory containing checkpoints to evaluate, typically '
'set to `train_dir` used in the training job.')
flags.DEFINE_string('eval_dir', '',
'Directory to write eval summaries to.')
flags.DEFINE_string('pipeline_config_path', '',
'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
'file. If provided, other configs are ignored')
flags.DEFINE_string('eval_config_path', '',
'Path to an eval_pb2.EvalConfig config file.')
flags.DEFINE_string('input_config_path', '',
'Path to an input_reader_pb2.InputReader config file.')
flags.DEFINE_string('model_config_path', '',
'Path to a model_pb2.DetectionModel config file.')
FLAGS = flags.FLAGS
def get_configs_from_pipeline_file():
"""Reads evaluation configuration from a pipeline_pb2.TrainEvalPipelineConfig.
Reads evaluation config from file specified by pipeline_config_path flag.
Returns:
model_config: a model_pb2.DetectionModel
eval_config: a eval_pb2.EvalConfig
input_config: a input_reader_pb2.InputReader
"""
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f:
text_format.Merge(f.read(), pipeline_config)
model_config = pipeline_config.model
if FLAGS.eval_training_data:
eval_config = pipeline_config.train_config
else:
eval_config = pipeline_config.eval_config
input_config = pipeline_config.eval_input_reader
return model_config, eval_config, input_config
def get_configs_from_multiple_files():
"""Reads evaluation configuration from multiple config files.
Reads the evaluation config from the following files:
model_config: Read from --model_config_path
eval_config: Read from --eval_config_path
input_config: Read from --input_config_path
Returns:
model_config: a model_pb2.DetectionModel
eval_config: a eval_pb2.EvalConfig
input_config: a input_reader_pb2.InputReader
"""
eval_config = eval_pb2.EvalConfig()
with tf.gfile.GFile(FLAGS.eval_config_path, 'r') as f:
text_format.Merge(f.read(), eval_config)
model_config = model_pb2.DetectionModel()
with tf.gfile.GFile(FLAGS.model_config_path, 'r') as f:
text_format.Merge(f.read(), model_config)
input_config = input_reader_pb2.InputReader()
with tf.gfile.GFile(FLAGS.input_config_path, 'r') as f:
text_format.Merge(f.read(), input_config)
return model_config, eval_config, input_config
def main(unused_argv):
assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.'
assert FLAGS.eval_dir, '`eval_dir` is missing.'
if FLAGS.pipeline_config_path:
model_config, eval_config, input_config = get_configs_from_pipeline_file()
else:
model_config, eval_config, input_config = get_configs_from_multiple_files()
model_fn = functools.partial(
model_builder.build,
model_config=model_config,
is_training=False)
create_input_dict_fn = functools.partial(
input_reader_builder.build,
input_config)
label_map = label_map_util.load_labelmap(input_config.label_map_path)
max_num_classes = max([item.id for item in label_map.item])
categories = label_map_util.convert_label_map_to_categories(
label_map, max_num_classes)
evaluator.evaluate(create_input_dict_fn, model_fn, eval_config, categories,
FLAGS.checkpoint_dir, FLAGS.eval_dir)
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Common functions for repeatedly evaluating a checkpoint.
"""
import copy
import logging
import os
import time
import numpy as np
import tensorflow as tf
from object_detection.utils import label_map_util
from object_detection.utils import object_detection_evaluation
from object_detection.utils import visualization_utils as vis_utils
slim = tf.contrib.slim
def write_metrics(metrics, global_step, summary_dir):
"""Write metrics to a summary directory.
Args:
metrics: A dictionary containing metric names and values.
global_step: Global step at which the metrics are computed.
summary_dir: Directory to write tensorflow summaries to.
"""
logging.info('Writing metrics to tf summary.')
summary_writer = tf.summary.FileWriter(summary_dir)
for key in sorted(metrics):
summary = tf.Summary(value=[
tf.Summary.Value(tag=key, simple_value=metrics[key]),
])
summary_writer.add_summary(summary, global_step)
logging.info('%s: %f', key, metrics[key])
summary_writer.close()
logging.info('Metrics written to tf summary.')
def evaluate_detection_results_pascal_voc(result_lists,
categories,
label_id_offset=0,
iou_thres=0.5,
corloc_summary=False):
"""Computes Pascal VOC detection metrics given groundtruth and detections.
This function computes Pascal VOC metrics. This function by default
takes detections and groundtruth boxes encoded in result_lists and writes
evaluation results to tf summaries which can be viewed on tensorboard.
Args:
result_lists: a dictionary holding lists of groundtruth and detection
data corresponding to each image being evaluated. The following keys
are required:
'image_id': a list of string ids
'detection_boxes': a list of float32 numpy arrays of shape [N, 4]
'detection_scores': a list of float32 numpy arrays of shape [N]
'detection_classes': a list of int32 numpy arrays of shape [N]
'groundtruth_boxes': a list of float32 numpy arrays of shape [M, 4]
'groundtruth_classes': a list of int32 numpy arrays of shape [M]
and the remaining fields below are optional:
'difficult': a list of boolean arrays of shape [M] indicating the
difficulty of groundtruth boxes. Some datasets like PASCAL VOC provide
this information and it is used to remove difficult examples from eval
in order to not penalize the models on them.
Note that it is okay to have additional fields in result_lists --- they
are simply ignored.
categories: a list of dictionaries representing all possible categories.
Each dict in this list has the following keys:
'id': (required) an integer id uniquely identifying this category
'name': (required) string representing category name
e.g., 'cat', 'dog', 'pizza'
label_id_offset: an integer offset for the label space.
iou_thres: float determining the IoU threshold at which a box is considered
correct. Defaults to the standard 0.5.
corloc_summary: boolean. If True, also outputs CorLoc metrics.
Returns:
A dictionary of metric names to scalar values.
Raises:
ValueError: if the set of keys in result_lists is not a superset of the
expected list of keys. Unexpected keys are ignored.
ValueError: if the lists in result_lists have inconsistent sizes.
"""
# check for expected keys in result_lists
expected_keys = [
'detection_boxes', 'detection_scores', 'detection_classes', 'image_id'
]
expected_keys += ['groundtruth_boxes', 'groundtruth_classes']
if not set(expected_keys).issubset(set(result_lists.keys())):
raise ValueError('result_lists does not have expected key set.')
num_results = len(result_lists[expected_keys[0]])
for key in expected_keys:
if len(result_lists[key]) != num_results:
raise ValueError('Inconsistent list sizes in result_lists')
# Pascal VOC evaluator assumes foreground index starts from zero.
categories = copy.deepcopy(categories)
for idx in range(len(categories)):
categories[idx]['id'] -= label_id_offset
# num_classes (maybe encoded as categories)
num_classes = max([cat['id'] for cat in categories]) + 1
logging.info('Computing Pascal VOC metrics on results.')
if all(image_id.isdigit() for image_id in result_lists['image_id']):
image_ids = [int(image_id) for image_id in result_lists['image_id']]
else:
image_ids = range(num_results)
evaluator = object_detection_evaluation.ObjectDetectionEvaluation(
num_classes, matching_iou_threshold=iou_thres)
difficult_lists = None
if 'difficult' in result_lists and result_lists['difficult']:
difficult_lists = result_lists['difficult']
for idx, image_id in enumerate(image_ids):
difficult = None
if difficult_lists is not None and difficult_lists[idx].size:
difficult = difficult_lists[idx].astype(np.bool)
evaluator.add_single_ground_truth_image_info(
image_id, result_lists['groundtruth_boxes'][idx],
result_lists['groundtruth_classes'][idx] - label_id_offset,
difficult)
evaluator.add_single_detected_image_info(
image_id, result_lists['detection_boxes'][idx],
result_lists['detection_scores'][idx],
result_lists['detection_classes'][idx] - label_id_offset)
per_class_ap, mean_ap, _, _, per_class_corloc, mean_corloc = (
evaluator.evaluate())
metrics = {'Precision/mAP@{}IOU'.format(iou_thres): mean_ap}
category_index = label_map_util.create_category_index(categories)
for idx in range(per_class_ap.size):
if idx in category_index:
display_name = ('PerformanceByCategory/mAP@{}IOU/{}'
.format(iou_thres, category_index[idx]['name']))
metrics[display_name] = per_class_ap[idx]
if corloc_summary:
metrics['CorLoc/CorLoc@{}IOU'.format(iou_thres)] = mean_corloc
for idx in range(per_class_corloc.size):
if idx in category_index:
display_name = (
'PerformanceByCategory/CorLoc@{}IOU/{}'.format(
iou_thres, category_index[idx]['name']))
metrics[display_name] = per_class_corloc[idx]
return metrics
# TODO: Add tests.
def visualize_detection_results(result_dict,
tag,
global_step,
categories,
summary_dir='',
export_dir='',
agnostic_mode=False,
show_groundtruth=False,
min_score_thresh=.5,
max_num_predictions=20):
"""Visualizes detection results and writes visualizations to image summaries.
This function visualizes an image with its detected bounding boxes and writes
to image summaries which can be viewed on tensorboard. It optionally also
writes images to a directory. In the case of missing entry in the label map,
unknown class name in the visualization is shown as "N/A".
Args:
result_dict: a dictionary holding groundtruth and detection
data corresponding to each image being evaluated. The following keys
are required:
'original_image': a numpy array representing the image with shape
[1, height, width, 3]
'detection_boxes': a numpy array of shape [N, 4]
'detection_scores': a numpy array of shape [N]
'detection_classes': a numpy array of shape [N]
The following keys are optional:
'groundtruth_boxes': a numpy array of shape [N, 4]
'groundtruth_keypoints': a numpy array of shape [N, num_keypoints, 2]
Detections are assumed to be provided in decreasing order of score and for
display, and we assume that scores are probabilities between 0 and 1.
tag: tensorboard tag (string) to associate with image.
global_step: global step at which the visualization are generated.
categories: a list of dictionaries representing all possible categories.
Each dict in this list has the following keys:
'id': (required) an integer id uniquely identifying this category
'name': (required) string representing category name
e.g., 'cat', 'dog', 'pizza'
'supercategory': (optional) string representing the supercategory
e.g., 'animal', 'vehicle', 'food', etc
summary_dir: the output directory to which the image summaries are written.
export_dir: the output directory to which images are written. If this is
empty (default), then images are not exported.
agnostic_mode: boolean (default: False) controlling whether to evaluate in
class-agnostic mode or not.
show_groundtruth: boolean (default: False) controlling whether to show
groundtruth boxes in addition to detected boxes
min_score_thresh: minimum score threshold for a box to be visualized
max_num_predictions: maximum number of detections to visualize
Raises:
ValueError: if result_dict does not contain the expected keys (i.e.,
'original_image', 'detection_boxes', 'detection_scores',
'detection_classes')
"""
if not set([
'original_image', 'detection_boxes', 'detection_scores',
'detection_classes'
]).issubset(set(result_dict.keys())):
raise ValueError('result_dict does not contain all expected keys.')
if show_groundtruth and 'groundtruth_boxes' not in result_dict:
raise ValueError('If show_groundtruth is enabled, result_dict must contain '
'groundtruth_boxes.')
logging.info('Creating detection visualizations.')
category_index = label_map_util.create_category_index(categories)
image = np.squeeze(result_dict['original_image'], axis=0)
detection_boxes = result_dict['detection_boxes']
detection_scores = result_dict['detection_scores']
detection_classes = np.int32((result_dict['detection_classes']))
detection_keypoints = result_dict.get('detection_keypoints', None)
detection_masks = result_dict.get('detection_masks', None)
# Plot groundtruth underneath detections
if show_groundtruth:
groundtruth_boxes = result_dict['groundtruth_boxes']
groundtruth_keypoints = result_dict.get('groundtruth_keypoints', None)
vis_utils.visualize_boxes_and_labels_on_image_array(
image,
groundtruth_boxes,
None,
None,
category_index,
keypoints=groundtruth_keypoints,
use_normalized_coordinates=False,
max_boxes_to_draw=None)
vis_utils.visualize_boxes_and_labels_on_image_array(
image,
detection_boxes,
detection_classes,
detection_scores,
category_index,
instance_masks=detection_masks,
keypoints=detection_keypoints,
use_normalized_coordinates=False,
max_boxes_to_draw=max_num_predictions,
min_score_thresh=min_score_thresh,
agnostic_mode=agnostic_mode)
if export_dir:
export_path = os.path.join(export_dir, 'export-{}.png'.format(tag))
vis_utils.save_image_array_as_png(image, export_path)
summary = tf.Summary(value=[
tf.Summary.Value(tag=tag, image=tf.Summary.Image(
encoded_image_string=vis_utils.encode_image_array_as_png_str(
image)))
])
summary_writer = tf.summary.FileWriter(summary_dir)
summary_writer.add_summary(summary, global_step)
summary_writer.close()
logging.info('Detection visualizations written to summary with tag %s.', tag)
# TODO: Add tests.
# TODO: Have an argument called `aggregated_processor_tensor_keys` that contains
# a whitelist of tensors used by the `aggregated_result_processor` instead of a
# blacklist. This will prevent us from inadvertently adding any evaluated
# tensors into the `results_list` data structure that are not needed by
# `aggregated_result_preprocessor`.
def run_checkpoint_once(tensor_dict,
update_op,
summary_dir,
aggregated_result_processor=None,
batch_processor=None,
checkpoint_dirs=None,
variables_to_restore=None,
restore_fn=None,
num_batches=1,
master='',
save_graph=False,
save_graph_dir='',
metric_names_to_values=None,
keys_to_exclude_from_results=()):
"""Evaluates both python metrics and tensorflow slim metrics.
Python metrics are processed in batch by the aggregated_result_processor,
while tensorflow slim metrics statistics are computed by running
metric_names_to_updates tensors and aggregated using metric_names_to_values
tensor.
Args:
tensor_dict: a dictionary holding tensors representing a batch of detections
and corresponding groundtruth annotations.
update_op: a tensorflow update op that will run for each batch along with
the tensors in tensor_dict..
summary_dir: a directory to write metrics summaries.
aggregated_result_processor: a function taking one arguments:
1. result_lists: a dictionary with keys matching those in tensor_dict
and corresponding values being the list of results for each tensor
in tensor_dict. The length of each such list is num_batches.
batch_processor: a function taking four arguments:
1. tensor_dict: the same tensor_dict that is passed in as the first
argument to this function.
2. sess: a tensorflow session
3. batch_index: an integer representing the index of the batch amongst
all batches
4. update_op: a tensorflow update op that will run for each batch.
and returns result_dict, a dictionary of results for that batch.
By default, batch_processor is None, which defaults to running:
return sess.run(tensor_dict)
To skip an image, it suffices to return an empty dictionary in place of
result_dict.
checkpoint_dirs: list of directories to load into an EnsembleModel. If it
has only one directory, EnsembleModel will not be used -- a DetectionModel
will be instantiated directly. Not used if restore_fn is set.
variables_to_restore: None, or a dictionary mapping variable names found in
a checkpoint to model variables. The dictionary would normally be
generated by creating a tf.train.ExponentialMovingAverage object and
calling its variables_to_restore() method. Not used if restore_fn is set.
restore_fn: None, or a function that takes a tf.Session object and correctly
restores all necessary variables from the correct checkpoint file. If
None, attempts to restore from the first directory in checkpoint_dirs.
num_batches: the number of batches to use for evaluation.
master: the location of the Tensorflow session.
save_graph: whether or not the Tensorflow graph is stored as a pbtxt file.
save_graph_dir: where to store the Tensorflow graph on disk. If save_graph
is True this must be non-empty.
metric_names_to_values: A dictionary containing metric names to tensors
which will be evaluated after processing all batches
of [tensor_dict, update_op]. If any metrics depend on statistics computed
during each batch ensure that `update_op` tensor has a control dependency
on the update ops that compute the statistics.
keys_to_exclude_from_results: keys in tensor_dict that will be excluded
from results_list. Note that the tensors corresponding to these keys will
still be evaluated for each batch, but won't be added to results_list.
Raises:
ValueError: if restore_fn is None and checkpoint_dirs doesn't have at least
one element.
ValueError: if save_graph is True and save_graph_dir is not defined.
"""
if save_graph and not save_graph_dir:
raise ValueError('`save_graph_dir` must be defined.')
sess = tf.Session(master, graph=tf.get_default_graph())
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
if restore_fn:
restore_fn(sess)
else:
if not checkpoint_dirs:
raise ValueError('`checkpoint_dirs` must have at least one entry.')
checkpoint_file = tf.train.latest_checkpoint(checkpoint_dirs[0])
saver = tf.train.Saver(variables_to_restore)
saver.restore(sess, checkpoint_file)
if save_graph:
tf.train.write_graph(sess.graph_def, save_graph_dir, 'eval.pbtxt')
valid_keys = list(set(tensor_dict.keys()) - set(keys_to_exclude_from_results))
result_lists = {key: [] for key in valid_keys}
counters = {'skipped': 0, 'success': 0}
other_metrics = None
with tf.contrib.slim.queues.QueueRunners(sess):
try:
for batch in range(int(num_batches)):
if (batch + 1) % 100 == 0:
logging.info('Running eval ops batch %d/%d', batch + 1, num_batches)
if not batch_processor:
try:
(result_dict, _) = sess.run([tensor_dict, update_op])
counters['success'] += 1
except tf.errors.InvalidArgumentError:
logging.info('Skipping image')
counters['skipped'] += 1
result_dict = {}
else:
result_dict = batch_processor(
tensor_dict, sess, batch, counters, update_op)
for key in result_dict:
if key in valid_keys:
result_lists[key].append(result_dict[key])
if metric_names_to_values is not None:
other_metrics = sess.run(metric_names_to_values)
logging.info('Running eval batches done.')
except tf.errors.OutOfRangeError:
logging.info('Done evaluating -- epoch limit reached')
finally:
# When done, ask the threads to stop.
metrics = aggregated_result_processor(result_lists)
if other_metrics is not None:
metrics.update(other_metrics)
global_step = tf.train.global_step(sess, slim.get_global_step())
write_metrics(metrics, global_step, summary_dir)
logging.info('# success: %d', counters['success'])
logging.info('# skipped: %d', counters['skipped'])
sess.close()
# TODO: Add tests.
def repeated_checkpoint_run(tensor_dict,
update_op,
summary_dir,
aggregated_result_processor=None,
batch_processor=None,
checkpoint_dirs=None,
variables_to_restore=None,
restore_fn=None,
num_batches=1,
eval_interval_secs=120,
max_number_of_evaluations=None,
master='',
save_graph=False,
save_graph_dir='',
metric_names_to_values=None,
keys_to_exclude_from_results=()):
"""Periodically evaluates desired tensors using checkpoint_dirs or restore_fn.
This function repeatedly loads a checkpoint and evaluates a desired
set of tensors (provided by tensor_dict) and hands the resulting numpy
arrays to a function result_processor which can be used to further
process/save/visualize the results.
Args:
tensor_dict: a dictionary holding tensors representing a batch of detections
and corresponding groundtruth annotations.
update_op: a tensorflow update op that will run for each batch along with
the tensors in tensor_dict.
summary_dir: a directory to write metrics summaries.
aggregated_result_processor: a function taking one argument:
1. result_lists: a dictionary with keys matching those in tensor_dict
and corresponding values being the list of results for each tensor
in tensor_dict. The length of each such list is num_batches.
batch_processor: a function taking three arguments:
1. tensor_dict: the same tensor_dict that is passed in as the first
argument to this function.
2. sess: a tensorflow session
3. batch_index: an integer representing the index of the batch amongst
all batches
4. update_op: a tensorflow update op that will run for each batch.
and returns result_dict, a dictionary of results for that batch.
By default, batch_processor is None, which defaults to running:
return sess.run(tensor_dict)
checkpoint_dirs: list of directories to load into a DetectionModel or an
EnsembleModel if restore_fn isn't set. Also used to determine when to run
next evaluation. Must have at least one element.
variables_to_restore: None, or a dictionary mapping variable names found in
a checkpoint to model variables. The dictionary would normally be
generated by creating a tf.train.ExponentialMovingAverage object and
calling its variables_to_restore() method. Not used if restore_fn is set.
restore_fn: a function that takes a tf.Session object and correctly restores
all necessary variables from the correct checkpoint file.
num_batches: the number of batches to use for evaluation.
eval_interval_secs: the number of seconds between each evaluation run.
max_number_of_evaluations: the max number of iterations of the evaluation.
If the value is left as None the evaluation continues indefinitely.
master: the location of the Tensorflow session.
save_graph: whether or not the Tensorflow graph is saved as a pbtxt file.
save_graph_dir: where to save on disk the Tensorflow graph. If store_graph
is True this must be non-empty.
metric_names_to_values: A dictionary containing metric names to tensors
which will be evaluated after processing all batches
of [tensor_dict, update_op]. If any metrics depend on statistics computed
during each batch ensure that `update_op` tensor has a control dependency
on the update ops that compute the statistics.
keys_to_exclude_from_results: keys in tensor_dict that will be excluded
from results_list. Note that the tensors corresponding to these keys will
still be evaluated for each batch, but won't be added to results_list.
Raises:
ValueError: if max_num_of_evaluations is not None or a positive number.
ValueError: if checkpoint_dirs doesn't have at least one element.
"""
if max_number_of_evaluations and max_number_of_evaluations <= 0:
raise ValueError(
'`number_of_steps` must be either None or a positive number.')
if not checkpoint_dirs:
raise ValueError('`checkpoint_dirs` must have at least one entry.')
last_evaluated_model_path = None
number_of_evaluations = 0
while True:
start = time.time()
logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S',
time.gmtime()))
model_path = tf.train.latest_checkpoint(checkpoint_dirs[0])
if not model_path:
logging.info('No model found in %s. Will try again in %d seconds',
checkpoint_dirs[0], eval_interval_secs)
elif model_path == last_evaluated_model_path:
logging.info('Found already evaluated checkpoint. Will try again in %d '
'seconds', eval_interval_secs)
else:
last_evaluated_model_path = model_path
run_checkpoint_once(tensor_dict, update_op, summary_dir,
aggregated_result_processor,
batch_processor, checkpoint_dirs,
variables_to_restore, restore_fn, num_batches, master,
save_graph, save_graph_dir, metric_names_to_values,
keys_to_exclude_from_results)
number_of_evaluations += 1
if (max_number_of_evaluations and
number_of_evaluations >= max_number_of_evaluations):
logging.info('Finished evaluation!')
break
time_to_next_eval = start + eval_interval_secs - time.time()
if time_to_next_eval > 0:
time.sleep(time_to_next_eval)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Detection model evaluator.
This file provides a generic evaluation method that can be used to evaluate a
DetectionModel.
"""
import logging
import tensorflow as tf
from object_detection import eval_util
from object_detection.core import box_list
from object_detection.core import box_list_ops
from object_detection.core import prefetcher
from object_detection.core import standard_fields as fields
from object_detection.utils import ops
slim = tf.contrib.slim
EVAL_METRICS_FN_DICT = {
'pascal_voc_metrics': eval_util.evaluate_detection_results_pascal_voc
}
def _extract_prediction_tensors(model,
create_input_dict_fn,
ignore_groundtruth=False):
"""Restores the model in a tensorflow session.
Args:
model: model to perform predictions with.
create_input_dict_fn: function to create input tensor dictionaries.
ignore_groundtruth: whether groundtruth should be ignored.
Returns:
tensor_dict: A tensor dictionary with evaluations.
"""
input_dict = create_input_dict_fn()
prefetch_queue = prefetcher.prefetch(input_dict, capacity=500)
input_dict = prefetch_queue.dequeue()
original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0)
preprocessed_image = model.preprocess(tf.to_float(original_image))
prediction_dict = model.predict(preprocessed_image)
detections = model.postprocess(prediction_dict)
original_image_shape = tf.shape(original_image)
absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
box_list.BoxList(tf.squeeze(detections['detection_boxes'], axis=0)),
original_image_shape[1], original_image_shape[2])
label_id_offset = 1
tensor_dict = {
'original_image': original_image,
'image_id': input_dict[fields.InputDataFields.source_id],
'detection_boxes': absolute_detection_boxlist.get(),
'detection_scores': tf.squeeze(detections['detection_scores'], axis=0),
'detection_classes': (
tf.squeeze(detections['detection_classes'], axis=0) +
label_id_offset),
}
if 'detection_masks' in detections:
detection_masks = tf.squeeze(detections['detection_masks'],
axis=0)
detection_boxes = tf.squeeze(detections['detection_boxes'],
axis=0)
# TODO: This should be done in model's postprocess function ideally.
detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
detection_masks,
detection_boxes,
original_image_shape[1], original_image_shape[2])
detection_masks_reframed = tf.to_float(tf.greater(detection_masks_reframed,
0.5))
tensor_dict['detection_masks'] = detection_masks_reframed
# load groundtruth fields into tensor_dict
if not ignore_groundtruth:
normalized_gt_boxlist = box_list.BoxList(
input_dict[fields.InputDataFields.groundtruth_boxes])
gt_boxlist = box_list_ops.scale(normalized_gt_boxlist,
tf.shape(original_image)[1],
tf.shape(original_image)[2])
groundtruth_boxes = gt_boxlist.get()
groundtruth_classes = input_dict[fields.InputDataFields.groundtruth_classes]
tensor_dict['groundtruth_boxes'] = groundtruth_boxes
tensor_dict['groundtruth_classes'] = groundtruth_classes
tensor_dict['area'] = input_dict[fields.InputDataFields.groundtruth_area]
tensor_dict['is_crowd'] = input_dict[
fields.InputDataFields.groundtruth_is_crowd]
tensor_dict['difficult'] = input_dict[
fields.InputDataFields.groundtruth_difficult]
if 'detection_masks' in tensor_dict:
tensor_dict['groundtruth_instance_masks'] = input_dict[
fields.InputDataFields.groundtruth_instance_masks]
return tensor_dict
def evaluate(create_input_dict_fn, create_model_fn, eval_config, categories,
checkpoint_dir, eval_dir):
"""Evaluation function for detection models.
Args:
create_input_dict_fn: a function to create a tensor input dictionary.
create_model_fn: a function that creates a DetectionModel.
eval_config: a eval_pb2.EvalConfig protobuf.
categories: a list of category dictionaries. Each dict in the list should
have an integer 'id' field and string 'name' field.
checkpoint_dir: directory to load the checkpoints to evaluate from.
eval_dir: directory to write evaluation metrics summary to.
"""
model = create_model_fn()
if eval_config.ignore_groundtruth and not eval_config.export_path:
logging.fatal('If ignore_groundtruth=True then an export_path is '
'required. Aborting!!!')
tensor_dict = _extract_prediction_tensors(
model=model,
create_input_dict_fn=create_input_dict_fn,
ignore_groundtruth=eval_config.ignore_groundtruth)
def _process_batch(tensor_dict, sess, batch_index, counters, update_op):
"""Evaluates tensors in tensor_dict, visualizing the first K examples.
This function calls sess.run on tensor_dict, evaluating the original_image
tensor only on the first K examples and visualizing detections overlaid
on this original_image.
Args:
tensor_dict: a dictionary of tensors
sess: tensorflow session
batch_index: the index of the batch amongst all batches in the run.
counters: a dictionary holding 'success' and 'skipped' fields which can
be updated to keep track of number of successful and failed runs,
respectively. If these fields are not updated, then the success/skipped
counter values shown at the end of evaluation will be incorrect.
update_op: An update op that has to be run along with output tensors. For
example this could be an op to compute statistics for slim metrics.
Returns:
result_dict: a dictionary of numpy arrays
"""
if batch_index >= eval_config.num_visualizations:
if 'original_image' in tensor_dict:
tensor_dict = {k: v for (k, v) in tensor_dict.items()
if k != 'original_image'}
try:
(result_dict, _) = sess.run([tensor_dict, update_op])
counters['success'] += 1
except tf.errors.InvalidArgumentError:
logging.info('Skipping image')
counters['skipped'] += 1
return {}
global_step = tf.train.global_step(sess, slim.get_global_step())
if batch_index < eval_config.num_visualizations:
tag = 'image-{}'.format(batch_index)
eval_util.visualize_detection_results(
result_dict, tag, global_step, categories=categories,
summary_dir=eval_dir,
export_dir=eval_config.visualization_export_dir,
show_groundtruth=eval_config.visualization_export_dir)
return result_dict
def _process_aggregated_results(result_lists):
eval_metric_fn_key = eval_config.metrics_set
if eval_metric_fn_key not in EVAL_METRICS_FN_DICT:
raise ValueError('Metric not found: {}'.format(eval_metric_fn_key))
return EVAL_METRICS_FN_DICT[eval_metric_fn_key](result_lists,
categories=categories)
variables_to_restore = tf.global_variables()
global_step = slim.get_or_create_global_step()
variables_to_restore.append(global_step)
if eval_config.use_moving_averages:
variable_averages = tf.train.ExponentialMovingAverage(0.0)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
def _restore_latest_checkpoint(sess):
latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
saver.restore(sess, latest_checkpoint)
eval_util.repeated_checkpoint_run(
tensor_dict=tensor_dict,
update_op=tf.no_op(),
summary_dir=eval_dir,
aggregated_result_processor=_process_aggregated_results,
batch_processor=_process_batch,
checkpoint_dirs=[checkpoint_dir],
variables_to_restore=None,
restore_fn=_restore_latest_checkpoint,
num_batches=eval_config.num_examples,
eval_interval_secs=eval_config.eval_interval_secs,
max_number_of_evaluations=(
1 if eval_config.ignore_groundtruth else
eval_config.max_evals if eval_config.max_evals else
None),
master=eval_config.eval_master,
save_graph=eval_config.save_graph,
save_graph_dir=(eval_dir if eval_config.save_graph else ''))
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Tool to export an object detection model for inference.
Prepares an object detection tensorflow graph for inference using model
configuration and an optional trained checkpoint. Outputs either an inference
graph or a SavedModel (https://tensorflow.github.io/serving/serving_basic.html).
The inference graph contains one of three input nodes depending on the user
specified option.
* `image_tensor`: Accepts a uint8 4-D tensor of shape [1, None, None, 3]
* `encoded_image_string_tensor`: Accepts a scalar string tensor of encoded PNG
or JPEG image.
* `tf_example`: Accepts a serialized TFExample proto. The batch size in this
case is always 1.
and the following output nodes returned by the model.postprocess(..):
* `num_detections`: Outputs float32 tensors of the form [batch]
that specifies the number of valid boxes per image in the batch.
* `detection_boxes`: Outputs float32 tensors of the form
[batch, num_boxes, 4] containing detected boxes.
* `detection_scores`: Outputs float32 tensors of the form
[batch, num_boxes] containing class scores for the detections.
* `detection_classes`: Outputs float32 tensors of the form
[batch, num_boxes] containing classes for the detections.
* `detection_masks`: Outputs float32 tensors of the form
[batch, num_boxes, mask_height, mask_width] containing predicted instance
masks for each box if its present in the dictionary of postprocessed
tensors returned by the model.
Note that currently `batch` is always 1, but we will support `batch` > 1 in
the future.
Optionally, one can freeze the graph by converting the weights in the provided
checkpoint as graph constants thereby eliminating the need to use a checkpoint
file during inference.
Note that this tool uses `use_moving_averages` from eval_config to decide
which weights to freeze.
Example Usage:
--------------
python export_inference_graph \
--input_type image_tensor \
--pipeline_config_path path/to/ssd_inception_v2.config \
--checkpoint_path path/to/model-ckpt \
--inference_graph_path path/to/inference_graph.pb
"""
import tensorflow as tf
from google.protobuf import text_format
from object_detection import exporter
from object_detection.protos import pipeline_pb2
slim = tf.contrib.slim
flags = tf.app.flags
flags.DEFINE_string('input_type', 'image_tensor', 'Type of input node. Can be '
'one of [`image_tensor`, `encoded_image_string_tensor`, '
'`tf_example`]')
flags.DEFINE_string('pipeline_config_path', '',
'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
'file.')
flags.DEFINE_string('checkpoint_path', '', 'Optional path to checkpoint file. '
'If provided, bakes the weights from the checkpoint into '
'the graph.')
flags.DEFINE_string('inference_graph_path', '', 'Path to write the output '
'inference graph.')
flags.DEFINE_bool('export_as_saved_model', False, 'Whether the exported graph '
'should be saved as a SavedModel')
FLAGS = flags.FLAGS
def main(_):
assert FLAGS.pipeline_config_path, 'TrainEvalPipelineConfig missing.'
assert FLAGS.inference_graph_path, 'Inference graph path missing.'
assert FLAGS.input_type, 'Input type missing.'
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f:
text_format.Merge(f.read(), pipeline_config)
exporter.export_inference_graph(FLAGS.input_type, pipeline_config,
FLAGS.checkpoint_path,
FLAGS.inference_graph_path,
FLAGS.export_as_saved_model)
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functions to export object detection inference graph."""
import logging
import os
import tensorflow as tf
from tensorflow.python import pywrap_tensorflow
from tensorflow.python.client import session
from tensorflow.python.framework import graph_util
from tensorflow.python.framework import importer
from tensorflow.python.platform import gfile
from tensorflow.python.saved_model import signature_constants
from tensorflow.python.training import saver as saver_lib
from object_detection.builders import model_builder
from object_detection.core import standard_fields as fields
from object_detection.data_decoders import tf_example_decoder
slim = tf.contrib.slim
# TODO: Replace with freeze_graph.freeze_graph_with_def_protos when
# newer version of Tensorflow becomes more common.
def freeze_graph_with_def_protos(
input_graph_def,
input_saver_def,
input_checkpoint,
output_node_names,
restore_op_name,
filename_tensor_name,
clear_devices,
initializer_nodes,
variable_names_blacklist=''):
"""Converts all variables in a graph and checkpoint into constants."""
del restore_op_name, filename_tensor_name # Unused by updated loading code.
# 'input_checkpoint' may be a prefix if we're using Saver V2 format
if not saver_lib.checkpoint_exists(input_checkpoint):
raise ValueError(
'Input checkpoint "' + input_checkpoint + '" does not exist!')
if not output_node_names:
raise ValueError(
'You must supply the name of a node to --output_node_names.')
# Remove all the explicit device specifications for this node. This helps to
# make the graph more portable.
if clear_devices:
for node in input_graph_def.node:
node.device = ''
_ = importer.import_graph_def(input_graph_def, name='')
with session.Session() as sess:
if input_saver_def:
saver = saver_lib.Saver(saver_def=input_saver_def)
saver.restore(sess, input_checkpoint)
else:
var_list = {}
reader = pywrap_tensorflow.NewCheckpointReader(input_checkpoint)
var_to_shape_map = reader.get_variable_to_shape_map()
for key in var_to_shape_map:
try:
tensor = sess.graph.get_tensor_by_name(key + ':0')
except KeyError:
# This tensor doesn't exist in the graph (for example it's
# 'global_step' or a similar housekeeping element) so skip it.
continue
var_list[key] = tensor
saver = saver_lib.Saver(var_list=var_list)
saver.restore(sess, input_checkpoint)
if initializer_nodes:
sess.run(initializer_nodes)
variable_names_blacklist = (variable_names_blacklist.split(',') if
variable_names_blacklist else None)
output_graph_def = graph_util.convert_variables_to_constants(
sess,
input_graph_def,
output_node_names.split(','),
variable_names_blacklist=variable_names_blacklist)
return output_graph_def
def get_frozen_graph_def(inference_graph_def, use_moving_averages,
input_checkpoint, output_node_names):
"""Freezes all variables in a graph definition."""
saver = None
if use_moving_averages:
variable_averages = tf.train.ExponentialMovingAverage(0.0)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
else:
saver = tf.train.Saver()
frozen_graph_def = freeze_graph_with_def_protos(
input_graph_def=inference_graph_def,
input_saver_def=saver.as_saver_def(),
input_checkpoint=input_checkpoint,
output_node_names=output_node_names,
restore_op_name='save/restore_all',
filename_tensor_name='save/Const:0',
clear_devices=True,
initializer_nodes='')
return frozen_graph_def
# TODO: Support batch tf example inputs.
def _tf_example_input_placeholder():
tf_example_placeholder = tf.placeholder(
tf.string, shape=[], name='tf_example')
tensor_dict = tf_example_decoder.TfExampleDecoder().decode(
tf_example_placeholder)
image = tensor_dict[fields.InputDataFields.image]
return tf.expand_dims(image, axis=0)
def _image_tensor_input_placeholder():
return tf.placeholder(dtype=tf.uint8,
shape=(1, None, None, 3),
name='image_tensor')
def _encoded_image_string_tensor_input_placeholder():
image_str = tf.placeholder(dtype=tf.string,
shape=[],
name='encoded_image_string_tensor')
image_tensor = tf.image.decode_image(image_str, channels=3)
image_tensor.set_shape((None, None, 3))
return tf.expand_dims(image_tensor, axis=0)
input_placeholder_fn_map = {
'image_tensor': _image_tensor_input_placeholder,
'encoded_image_string_tensor':
_encoded_image_string_tensor_input_placeholder,
'tf_example': _tf_example_input_placeholder,
}
def _add_output_tensor_nodes(postprocessed_tensors):
"""Adds output nodes for detection boxes and scores.
Adds the following nodes for output tensors -
* num_detections: float32 tensor of shape [batch_size].
* detection_boxes: float32 tensor of shape [batch_size, num_boxes, 4]
containing detected boxes.
* detection_scores: float32 tensor of shape [batch_size, num_boxes]
containing scores for the detected boxes.
* detection_classes: float32 tensor of shape [batch_size, num_boxes]
containing class predictions for the detected boxes.
* detection_masks: (Optional) float32 tensor of shape
[batch_size, num_boxes, mask_height, mask_width] containing masks for each
detection box.
Args:
postprocessed_tensors: a dictionary containing the following fields
'detection_boxes': [batch, max_detections, 4]
'detection_scores': [batch, max_detections]
'detection_classes': [batch, max_detections]
'detection_masks': [batch, max_detections, mask_height, mask_width]
(optional).
'num_detections': [batch]
Returns:
A tensor dict containing the added output tensor nodes.
"""
label_id_offset = 1
boxes = postprocessed_tensors.get('detection_boxes')
scores = postprocessed_tensors.get('detection_scores')
classes = postprocessed_tensors.get('detection_classes') + label_id_offset
masks = postprocessed_tensors.get('detection_masks')
num_detections = postprocessed_tensors.get('num_detections')
outputs = {}
outputs['detection_boxes'] = tf.identity(boxes, name='detection_boxes')
outputs['detection_scores'] = tf.identity(scores, name='detection_scores')
outputs['detection_classes'] = tf.identity(classes, name='detection_classes')
outputs['num_detections'] = tf.identity(num_detections, name='num_detections')
if masks is not None:
outputs['detection_masks'] = tf.identity(masks, name='detection_masks')
return outputs
def _write_inference_graph(inference_graph_path,
checkpoint_path=None,
use_moving_averages=False,
output_node_names=(
'num_detections,detection_scores,'
'detection_boxes,detection_classes')):
"""Writes inference graph to disk with the option to bake in weights.
If checkpoint_path is not None bakes the weights into the graph thereby
eliminating the need of checkpoint files during inference. If the model
was trained with moving averages, setting use_moving_averages to true
restores the moving averages, otherwise the original set of variables
is restored.
Args:
inference_graph_path: Path to write inference graph.
checkpoint_path: Optional path to the checkpoint file.
use_moving_averages: Whether to export the original or the moving averages
of the trainable variables from the checkpoint.
output_node_names: Output tensor names, defaults are: num_detections,
detection_scores, detection_boxes, detection_classes.
"""
inference_graph_def = tf.get_default_graph().as_graph_def()
if checkpoint_path:
output_graph_def = get_frozen_graph_def(
inference_graph_def=inference_graph_def,
use_moving_averages=use_moving_averages,
input_checkpoint=checkpoint_path,
output_node_names=output_node_names,
)
with gfile.GFile(inference_graph_path, 'wb') as f:
f.write(output_graph_def.SerializeToString())
logging.info('%d ops in the final graph.', len(output_graph_def.node))
return
tf.train.write_graph(inference_graph_def,
os.path.dirname(inference_graph_path),
os.path.basename(inference_graph_path),
as_text=False)
def _write_saved_model(inference_graph_path, inputs, outputs,
checkpoint_path=None, use_moving_averages=False):
"""Writes SavedModel to disk.
If checkpoint_path is not None bakes the weights into the graph thereby
eliminating the need of checkpoint files during inference. If the model
was trained with moving averages, setting use_moving_averages to true
restores the moving averages, otherwise the original set of variables
is restored.
Args:
inference_graph_path: Path to write inference graph.
inputs: The input image tensor to use for detection.
outputs: A tensor dictionary containing the outputs of a DetectionModel.
checkpoint_path: Optional path to the checkpoint file.
use_moving_averages: Whether to export the original or the moving averages
of the trainable variables from the checkpoint.
"""
inference_graph_def = tf.get_default_graph().as_graph_def()
checkpoint_graph_def = None
if checkpoint_path:
output_node_names = ','.join(outputs.keys())
checkpoint_graph_def = get_frozen_graph_def(
inference_graph_def=inference_graph_def,
use_moving_averages=use_moving_averages,
input_checkpoint=checkpoint_path,
output_node_names=output_node_names
)
with tf.Graph().as_default():
with session.Session() as sess:
tf.import_graph_def(checkpoint_graph_def)
builder = tf.saved_model.builder.SavedModelBuilder(inference_graph_path)
tensor_info_inputs = {
'inputs': tf.saved_model.utils.build_tensor_info(inputs)}
tensor_info_outputs = {}
for k, v in outputs.items():
tensor_info_outputs[k] = tf.saved_model.utils.build_tensor_info(v)
detection_signature = (
tf.saved_model.signature_def_utils.build_signature_def(
inputs=tensor_info_inputs,
outputs=tensor_info_outputs,
method_name=signature_constants.PREDICT_METHOD_NAME))
builder.add_meta_graph_and_variables(
sess, [tf.saved_model.tag_constants.SERVING],
signature_def_map={
signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
detection_signature,
},
)
builder.save()
def _export_inference_graph(input_type,
detection_model,
use_moving_averages,
checkpoint_path,
inference_graph_path,
export_as_saved_model=False):
"""Export helper."""
if input_type not in input_placeholder_fn_map:
raise ValueError('Unknown input type: {}'.format(input_type))
inputs = tf.to_float(input_placeholder_fn_map[input_type]())
preprocessed_inputs = detection_model.preprocess(inputs)
output_tensors = detection_model.predict(preprocessed_inputs)
postprocessed_tensors = detection_model.postprocess(output_tensors)
outputs = _add_output_tensor_nodes(postprocessed_tensors)
out_node_names = list(outputs.keys())
if export_as_saved_model:
_write_saved_model(inference_graph_path, inputs, outputs, checkpoint_path,
use_moving_averages)
else:
_write_inference_graph(inference_graph_path, checkpoint_path,
use_moving_averages,
output_node_names=','.join(out_node_names))
def export_inference_graph(input_type, pipeline_config, checkpoint_path,
inference_graph_path, export_as_saved_model=False):
"""Exports inference graph for the model specified in the pipeline config.
Args:
input_type: Type of input for the graph. Can be one of [`image_tensor`,
`tf_example`].
pipeline_config: pipeline_pb2.TrainAndEvalPipelineConfig proto.
checkpoint_path: Path to the checkpoint file to freeze.
inference_graph_path: Path to write inference graph to.
export_as_saved_model: If the model should be exported as a SavedModel. If
false, it is saved as an inference graph.
"""
detection_model = model_builder.build(pipeline_config.model,
is_training=False)
_export_inference_graph(input_type, detection_model,
pipeline_config.eval_config.use_moving_averages,
checkpoint_path, inference_graph_path,
export_as_saved_model)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.export_inference_graph."""
import os
import numpy as np
import six
import tensorflow as tf
from object_detection import exporter
from object_detection.builders import model_builder
from object_detection.core import model
from object_detection.protos import pipeline_pb2
if six.PY2:
import mock # pylint: disable=g-import-not-at-top
else:
from unittest import mock # pylint: disable=g-import-not-at-top
class FakeModel(model.DetectionModel):
def __init__(self, add_detection_masks=False):
self._add_detection_masks = add_detection_masks
def preprocess(self, inputs):
return tf.identity(inputs)
def predict(self, preprocessed_inputs):
return {'image': tf.layers.conv2d(preprocessed_inputs, 3, 1)}
def postprocess(self, prediction_dict):
with tf.control_dependencies(prediction_dict.values()):
postprocessed_tensors = {
'detection_boxes': tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.8, 0.8]], tf.float32),
'detection_scores': tf.constant([[0.7, 0.6]], tf.float32),
'detection_classes': tf.constant([[0, 1]], tf.float32),
'num_detections': tf.constant([2], tf.float32)
}
if self._add_detection_masks:
postprocessed_tensors['detection_masks'] = tf.constant(
np.arange(32).reshape([2, 4, 4]), tf.float32)
return postprocessed_tensors
def restore_fn(self, checkpoint_path, from_detection_checkpoint):
pass
def loss(self, prediction_dict):
pass
class ExportInferenceGraphTest(tf.test.TestCase):
def _save_checkpoint_from_mock_model(self, checkpoint_path,
use_moving_averages):
g = tf.Graph()
with g.as_default():
mock_model = FakeModel()
preprocessed_inputs = mock_model.preprocess(
tf.ones([1, 3, 4, 3], tf.float32))
predictions = mock_model.predict(preprocessed_inputs)
mock_model.postprocess(predictions)
if use_moving_averages:
tf.train.ExponentialMovingAverage(0.0).apply()
saver = tf.train.Saver()
init = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init)
saver.save(sess, checkpoint_path)
def _load_inference_graph(self, inference_graph_path):
od_graph = tf.Graph()
with od_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(inference_graph_path) as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
return od_graph
def _create_tf_example(self, image_array):
with self.test_session():
encoded_image = tf.image.encode_jpeg(tf.constant(image_array)).eval()
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': _bytes_feature(encoded_image),
'image/format': _bytes_feature('jpg'),
'image/source_id': _bytes_feature('image_id')
})).SerializeToString()
return example
def test_export_graph_with_image_tensor_input(self):
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel()
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pbtxt')
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
exporter.export_inference_graph(
input_type='image_tensor',
pipeline_config=pipeline_config,
checkpoint_path=None,
inference_graph_path=inference_graph_path)
def test_export_graph_with_tf_example_input(self):
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel()
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pbtxt')
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
exporter.export_inference_graph(
input_type='tf_example',
pipeline_config=pipeline_config,
checkpoint_path=None,
inference_graph_path=inference_graph_path)
def test_export_graph_with_encoded_image_string_input(self):
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel()
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pbtxt')
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
exporter.export_inference_graph(
input_type='encoded_image_string_tensor',
pipeline_config=pipeline_config,
checkpoint_path=None,
inference_graph_path=inference_graph_path)
def test_export_frozen_graph(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
use_moving_averages=False)
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pb')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel()
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
exporter.export_inference_graph(
input_type='image_tensor',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path)
def test_export_frozen_graph_with_moving_averages(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
use_moving_averages=True)
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pb')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel()
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = True
exporter.export_inference_graph(
input_type='image_tensor',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path)
def test_export_model_with_all_output_nodes(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
use_moving_averages=False)
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pb')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(add_detection_masks=True)
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
exporter.export_inference_graph(
input_type='image_tensor',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path)
inference_graph = self._load_inference_graph(inference_graph_path)
with self.test_session(graph=inference_graph):
inference_graph.get_tensor_by_name('image_tensor:0')
inference_graph.get_tensor_by_name('detection_boxes:0')
inference_graph.get_tensor_by_name('detection_scores:0')
inference_graph.get_tensor_by_name('detection_classes:0')
inference_graph.get_tensor_by_name('detection_masks:0')
inference_graph.get_tensor_by_name('num_detections:0')
def test_export_model_with_detection_only_nodes(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
use_moving_averages=False)
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pb')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(add_detection_masks=False)
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
exporter.export_inference_graph(
input_type='image_tensor',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path)
inference_graph = self._load_inference_graph(inference_graph_path)
with self.test_session(graph=inference_graph):
inference_graph.get_tensor_by_name('image_tensor:0')
inference_graph.get_tensor_by_name('detection_boxes:0')
inference_graph.get_tensor_by_name('detection_scores:0')
inference_graph.get_tensor_by_name('detection_classes:0')
inference_graph.get_tensor_by_name('num_detections:0')
with self.assertRaises(KeyError):
inference_graph.get_tensor_by_name('detection_masks:0')
def test_export_and_run_inference_with_image_tensor(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
use_moving_averages=False)
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pb')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(add_detection_masks=True)
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
exporter.export_inference_graph(
input_type='image_tensor',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path)
inference_graph = self._load_inference_graph(inference_graph_path)
with self.test_session(graph=inference_graph) as sess:
image_tensor = inference_graph.get_tensor_by_name('image_tensor:0')
boxes = inference_graph.get_tensor_by_name('detection_boxes:0')
scores = inference_graph.get_tensor_by_name('detection_scores:0')
classes = inference_graph.get_tensor_by_name('detection_classes:0')
masks = inference_graph.get_tensor_by_name('detection_masks:0')
num_detections = inference_graph.get_tensor_by_name('num_detections:0')
(boxes, scores, classes, masks, num_detections) = sess.run(
[boxes, scores, classes, masks, num_detections],
feed_dict={image_tensor: np.ones((1, 4, 4, 3)).astype(np.uint8)})
self.assertAllClose(boxes, [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.8, 0.8]])
self.assertAllClose(scores, [[0.7, 0.6]])
self.assertAllClose(classes, [[1, 2]])
self.assertAllClose(masks, np.arange(32).reshape([2, 4, 4]))
self.assertAllClose(num_detections, [2])
def _create_encoded_image_string(self, image_array_np, encoding_format):
od_graph = tf.Graph()
with od_graph.as_default():
if encoding_format == 'jpg':
encoded_string = tf.image.encode_jpeg(image_array_np)
elif encoding_format == 'png':
encoded_string = tf.image.encode_png(image_array_np)
else:
raise ValueError('Supports only the following formats: `jpg`, `png`')
with self.test_session(graph=od_graph):
return encoded_string.eval()
def test_export_and_run_inference_with_encoded_image_string_tensor(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
use_moving_averages=False)
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pb')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(add_detection_masks=True)
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
exporter.export_inference_graph(
input_type='encoded_image_string_tensor',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path)
inference_graph = self._load_inference_graph(inference_graph_path)
jpg_image_str = self._create_encoded_image_string(
np.ones((4, 4, 3)).astype(np.uint8), 'jpg')
png_image_str = self._create_encoded_image_string(
np.ones((4, 4, 3)).astype(np.uint8), 'png')
with self.test_session(graph=inference_graph) as sess:
image_str_tensor = inference_graph.get_tensor_by_name(
'encoded_image_string_tensor:0')
boxes = inference_graph.get_tensor_by_name('detection_boxes:0')
scores = inference_graph.get_tensor_by_name('detection_scores:0')
classes = inference_graph.get_tensor_by_name('detection_classes:0')
masks = inference_graph.get_tensor_by_name('detection_masks:0')
num_detections = inference_graph.get_tensor_by_name('num_detections:0')
for image_str in [jpg_image_str, png_image_str]:
(boxes_np, scores_np, classes_np, masks_np,
num_detections_np) = sess.run(
[boxes, scores, classes, masks, num_detections],
feed_dict={image_str_tensor: image_str})
self.assertAllClose(boxes_np, [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.8, 0.8]])
self.assertAllClose(scores_np, [[0.7, 0.6]])
self.assertAllClose(classes_np, [[1, 2]])
self.assertAllClose(masks_np, np.arange(32).reshape([2, 4, 4]))
self.assertAllClose(num_detections_np, [2])
def test_export_and_run_inference_with_tf_example(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
use_moving_averages=False)
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pb')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(add_detection_masks=True)
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
exporter.export_inference_graph(
input_type='tf_example',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path)
inference_graph = self._load_inference_graph(inference_graph_path)
with self.test_session(graph=inference_graph) as sess:
tf_example = inference_graph.get_tensor_by_name('tf_example:0')
boxes = inference_graph.get_tensor_by_name('detection_boxes:0')
scores = inference_graph.get_tensor_by_name('detection_scores:0')
classes = inference_graph.get_tensor_by_name('detection_classes:0')
masks = inference_graph.get_tensor_by_name('detection_masks:0')
num_detections = inference_graph.get_tensor_by_name('num_detections:0')
(boxes, scores, classes, masks, num_detections) = sess.run(
[boxes, scores, classes, masks, num_detections],
feed_dict={tf_example: self._create_tf_example(
np.ones((4, 4, 3)).astype(np.uint8))})
self.assertAllClose(boxes, [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.8, 0.8]])
self.assertAllClose(scores, [[0.7, 0.6]])
self.assertAllClose(classes, [[1, 2]])
self.assertAllClose(masks, np.arange(32).reshape([2, 4, 4]))
self.assertAllClose(num_detections, [2])
def test_export_saved_model_and_run_inference(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
use_moving_averages=False)
inference_graph_path = os.path.join(self.get_temp_dir(),
'saved_model')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(add_detection_masks=True)
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
exporter.export_inference_graph(
input_type='tf_example',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path,
export_as_saved_model=True)
with tf.Graph().as_default() as od_graph:
with self.test_session(graph=od_graph) as sess:
tf.saved_model.loader.load(
sess, [tf.saved_model.tag_constants.SERVING], inference_graph_path)
tf_example = od_graph.get_tensor_by_name('import/tf_example:0')
boxes = od_graph.get_tensor_by_name('import/detection_boxes:0')
scores = od_graph.get_tensor_by_name('import/detection_scores:0')
classes = od_graph.get_tensor_by_name('import/detection_classes:0')
masks = od_graph.get_tensor_by_name('import/detection_masks:0')
num_detections = od_graph.get_tensor_by_name('import/num_detections:0')
(boxes, scores, classes, masks, num_detections) = sess.run(
[boxes, scores, classes, masks, num_detections],
feed_dict={tf_example: self._create_tf_example(
np.ones((4, 4, 3)).astype(np.uint8))})
self.assertAllClose(boxes, [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.8, 0.8]])
self.assertAllClose(scores, [[0.7, 0.6]])
self.assertAllClose(classes, [[1, 2]])
self.assertAllClose(masks, np.arange(32).reshape([2, 4, 4]))
self.assertAllClose(num_detections, [2])
if __name__ == '__main__':
tf.test.main()
# Configuring the Object Detection Training Pipeline
## Overview
The Tensorflow Object Detection API uses protobuf files to configure the
training and evaluation process. The schema for the training pipeline can be
found in object_detection/protos/pipeline.proto. At a high level, the config
file is split into 5 parts:
1. The `model` configuration. This defines what type of model will be trained
(ie. meta-architecture, feature extractor).
2. The `train_config`, which decides what parameters should be used to train
model parameters (ie. SGD parameters, input preprocessing and feature extractor
initialization values).
3. The `eval_config`, which determines what set of metrics will be reported for
evaluation (currently we only support the PASCAL VOC metrics).
4. The `train_input_config`, which defines what dataset the model should be
trained on.
5. The `eval_input_config`, which defines what dataset the model will be
evaluated on. Typically this should be different than the training input
dataset.
A skeleton configuration file is shown below:
```
model {
(... Add model config here...)
}
train_config : {
(... Add train_config here...)
}
train_input_reader: {
(... Add train_input configuration here...)
}
eval_config: {
}
eval_input_reader: {
(... Add eval_input configuration here...)
}
```
## Picking Model Parameters
There are a large number of model parameters to configure. The best settings
will depend on your given application. Faster R-CNN models are better suited to
cases where high accuracy is desired and latency is of lower priority.
Conversely, if processing time is the most important factor, SSD models are
recommended. Read [our paper](https://arxiv.org/abs/1611.10012) for a more
detailed discussion on the speed vs accuracy tradeoff.
To help new users get started, sample model configurations have been provided
in the object_detection/samples/model_configs folder. The contents of these
configuration files can be pasted into `model` field of the skeleton
configuration. Users should note that the `num_classes` field should be changed
to a value suited for the dataset the user is training on.
## Defining Inputs
The Tensorflow Object Detection API accepts inputs in the TFRecord file format.
Users must specify the locations of both the training and evaluation files.
Additionally, users should also specify a label map, which define the mapping
between a class id and class name. The label map should be identical between
training and evaluation datasets.
An example input configuration looks as follows:
```
tf_record_input_reader {
input_path: "/usr/home/username/data/train.record"
}
label_map_path: "/usr/home/username/data/label_map.pbtxt"
```
Users should substitute the `input_path` and `label_map_path` arguments and
insert the input configuration into the `train_input_reader` and
`eval_input_reader` fields in the skeleton configuration. Note that the paths
can also point to Google Cloud Storage buckets (ie.
"gs://project_bucket/train.record") for use on Google Cloud.
## Configuring the Trainer
The `train_config` defines parts of the training process:
1. Model parameter initialization.
2. Input preprocessing.
3. SGD parameters.
A sample `train_config` is below:
```
batch_size: 1
optimizer {
momentum_optimizer: {
learning_rate: {
manual_step_learning_rate {
initial_learning_rate: 0.0002
schedule {
step: 0
learning_rate: .0002
}
schedule {
step: 900000
learning_rate: .00002
}
schedule {
step: 1200000
learning_rate: .000002
}
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
fine_tune_checkpoint: "/usr/home/username/tmp/model.ckpt-#####"
from_detection_checkpoint: true
gradient_clipping_by_norm: 10.0
data_augmentation_options {
random_horizontal_flip {
}
}
```
### Model Parameter Initialization
While optional, it is highly recommended that users utilize other object
detection checkpoints. Training an object detector from scratch can take days.
To speed up the training process, it is recommended that users re-use the
feature extractor parameters from a pre-existing object classification or
detection checkpoint. `train_config` provides two fields to specify
pre-existing checkpoints: `fine_tune_checkpoint` and
`from_detection_checkpoint`. `fine_tune_checkpoint` should provide a path to
the pre-existing checkpoint
(ie:"/usr/home/username/checkpoint/model.ckpt-#####").
`from_detection_checkpoint` is a boolean value. If false, it assumes the
checkpoint was from an object classification checkpoint. Note that starting
from a detection checkpoint will usually result in a faster training job than
a classification checkpoint.
The list of provided checkpoints can be found [here](detection_model_zoo.md).
### Input Preprocessing
The `data_augmentation_options` in `train_config` can be used to specify
how training data can be modified. This field is optional.
### SGD Parameters
The remainings parameters in `train_config` are hyperparameters for gradient
descent. Please note that the optimal learning rates provided in these
configuration files may depend on the specifics of the training setup (e.g.
number of workers, gpu type).
## Configuring the Evaluator
Currently evaluation is fixed to generating metrics as defined by the PASCAL
VOC challenge. The parameters for `eval_config` are set to reasonable defaults
and typically do not need to be configured.
# So you want to create a new model!
In this section, we discuss some of the abstractions that we use
for defining detection models. If you would like to define a new model
architecture for detection and use it in the Tensorflow Detection API,
then this section should also serve as a high level guide to the files that you
will need to edit to get your new model working.
## DetectionModels (`object_detection/core/model.py`)
In order to be trained, evaluated, and exported for serving using our
provided binaries, all models under the Tensorflow Object Detection API must
implement the `DetectionModel` interface (see the full definition in `object_detection/core/model.py`). In particular,
each of these models are responsible for implementing 5 functions:
* `preprocess`: Run any preprocessing (e.g., scaling/shifting/reshaping) of
input values that is necessary prior to running the detector on an input
image.
* `predict`: Produce “raw” prediction tensors that can be passed to loss or
postprocess functions.
* `postprocess`: Convert predicted output tensors to final detections.
* `loss`: Compute scalar loss tensors with respect to provided groundtruth.
* `restore`: Load a checkpoint into the Tensorflow graph.
Given a `DetectionModel` at training time, we pass each image batch through
the following sequence of functions to compute a loss which can be optimized via
SGD:
```
inputs (images tensor) -> preprocess -> predict -> loss -> outputs (loss tensor)
```
And at eval time, we pass each image batch through the following sequence of
functions to produce a set of detections:
```
inputs (images tensor) -> preprocess -> predict -> postprocess ->
outputs (boxes tensor, scores tensor, classes tensor, num_detections tensor)
```
Some conventions to be aware of:
* `DetectionModel`s should make no assumptions about the input size or aspect
ratio --- they are responsible for doing any resize/reshaping necessary
(see docstring for the `preprocess` function).
* Output classes are always integers in the range `[0, num_classes)`.
Any mapping of these integers to semantic labels is to be handled outside
of this class. We never explicitly emit a “background class” --- thus 0 is
the first non-background class and any logic of predicting and removing
implicit background classes must be handled internally by the implementation.
* Detected boxes are to be interpreted as being in
`[y_min, x_min, y_max, x_max]` format and normalized relative to the
image window.
* We do not specifically assume any kind of probabilistic interpretation of the
scores --- the only important thing is their relative ordering. Thus
implementations of the postprocess function are free to output logits,
probabilities, calibrated probabilities, or anything else.
## Defining a new Faster R-CNN or SSD Feature Extractor
In most cases, you probably will not implement a `DetectionModel` from scratch
--- instead you might create a new feature extractor to be used by one of the
SSD or Faster R-CNN meta-architectures. (We think of meta-architectures as
classes that define entire families of models using the `DetectionModel`
abstraction).
Note: For the following discussion to make sense, we recommend first becoming
familiar with the [Faster R-CNN](https://arxiv.org/abs/1506.01497) paper.
Let’s now imagine that you have invented a brand new network architecture
(say, “InceptionV100”) for classification and want to see how InceptionV100
would behave as a feature extractor for detection (say, with Faster R-CNN).
A similar procedure would hold for SSD models, but we’ll discuss Faster R-CNN.
To use InceptionV100, we will have to define a new
`FasterRCNNFeatureExtractor` and pass it to our `FasterRCNNMetaArch`
constructor as input. See
`object_detection/meta_architectures/faster_rcnn_meta_arch.py` for definitions
of `FasterRCNNFeatureExtractor` and `FasterRCNNMetaArch`, respectively.
A `FasterRCNNFeatureExtractor` must define a few
functions:
* `preprocess`: Run any preprocessing of input values that is necessary prior
to running the detector on an input image.
* `_extract_proposal_features`: Extract first stage Region Proposal Network
(RPN) features.
* `_extract_box_classifier_features`: Extract second stage Box Classifier
features.
* `restore_from_classification_checkpoint_fn`: Load a checkpoint into the
Tensorflow graph.
See the `object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py`
definition as one example. Some remarks:
* We typically initialize the weights of this feature extractor
using those from the
[Slim Resnet-101 classification checkpoint](https://github.com/tensorflow/models/tree/master/slim#pre-trained-models),
and we know
that images were preprocessed when training this checkpoint
by subtracting a channel mean from each input
image. Thus, we implement the preprocess function to replicate the same
channel mean subtraction behavior.
* The “full” resnet classification network defined in slim is cut into two
parts --- all but the last “resnet block” is put into the
`_extract_proposal_features` function and the final block is separately
defined in the `_extract_box_classifier_features function`. In general,
some experimentation may be required to decide on an optimal layer at
which to “cut” your feature extractor into these two pieces for Faster R-CNN.
## Register your model for configuration
Assuming that your new feature extractor does not require nonstandard
configuration, you will want to ideally be able to simply change the
“feature_extractor.type” fields in your configuration protos to point to a
new feature extractor. In order for our API to know how to understand this
new type though, you will first have to register your new feature
extractor with the model builder (`object_detection/builders/model_builder.py`),
whose job is to create models from config protos..
Registration is simple --- just add a pointer to the new Feature Extractor
class that you have defined in one of the SSD or Faster R-CNN Feature
Extractor Class maps at the top of the
`object_detection/builders/model_builder.py` file.
We recommend adding a test in `object_detection/builders/model_builder_test.py`
to make sure that parsing your proto will work as expected.
## Taking your new model for a spin
After registration you are ready to go with your model! Some final tips:
* To save time debugging, try running your configuration file locally first
(both training and evaluation).
* Do a sweep of learning rates to figure out which learning rate is best
for your model.
* A small but often important detail: you may find it necessary to disable
batchnorm training (that is, load the batch norm parameters from the
classification checkpoint, but do not update them during gradient descent).
# Tensorflow detection model zoo
We provide a collection of detection models pre-trained on the
[COCO dataset](http://mscoco.org).
These models can be useful for out-of-the-box inference if you are interested
in categories already in COCO (e.g., humans, cars, etc).
They are also useful for initializing your models when training on novel
datasets.
In the table below, we list each such pre-trained model including:
* a model name that corresponds to a config file that was used to train this
model in the `samples/configs` directory,
* a download link to a tar.gz file containing the pre-trained model,
* model speed (one of {slow, medium, fast}),
* detector performance on COCO data as measured by the COCO mAP measure.
Here, higher is better, and we only report bounding box mAP rounded to the
nearest integer.
* Output types (currently only `Boxes`)
You can un-tar each tar.gz file via, e.g.,:
```
tar -xzvf ssd_mobilenet_v1_coco.tar.gz
```
Inside the un-tar'ed directory, you will find:
* a graph proto (`graph.pbtxt`)
* a checkpoint
(`model.ckpt.data-00000-of-00001`, `model.ckpt.index`, `model.ckpt.meta`)
* a frozen graph proto with weights baked into the graph as constants
(`frozen_inference_graph.pb`) to be used for out of the box inference
(try this out in the Jupyter notebook!)
| Model name | Speed | COCO mAP | Outputs |
| ------------ | :--------------: | :--------------: | :-------------: |
| [ssd_mobilenet_v1_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_11_06_2017.tar.gz) | fast | 21 | Boxes |
| [ssd_inception_v2_coco](http://download.tensorflow.org/models/object_detection/ssd_inception_v2_coco_11_06_2017.tar.gz) | fast | 24 | Boxes |
| [rfcn_resnet101_coco](http://download.tensorflow.org/models/object_detection/rfcn_resnet101_coco_11_06_2017.tar.gz) | medium | 30 | Boxes |
| [faster_rcnn_resnet101_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_coco_11_06_2017.tar.gz) | medium | 32 | Boxes |
| [faster_rcnn_inception_resnet_v2_atrous_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_coco_11_06_2017.tar.gz) | slow | 37 | Boxes |
# Exporting a trained model for inference
After your model has been trained, you should export it to a Tensorflow
graph proto. A checkpoint will typically consist of three files:
* model.ckpt-${CHECKPOINT_NUMBER}.data-00000-of-00001,
* model.ckpt-${CHECKPOINT_NUMBER}.index
* model.ckpt-${CHECKPOINT_NUMBER}.meta
After you've identified a candidate checkpoint to export, run the following
command from tensorflow/models/object_detection:
``` bash
# From tensorflow/models
python object_detection/export_inference_graph \
--input_type image_tensor \
--pipeline_config_path ${PIPELINE_CONFIG_PATH} \
--checkpoint_path model.ckpt-${CHECKPOINT_NUMBER} \
--inference_graph_path output_inference_graph.pb
```
Afterwards, you should see a graph named output_inference_graph.pb.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment