Commit 47bc1813 authored by syiming's avatar syiming
Browse files

Merge remote-tracking branch 'upstream/master' into add_multilevel_crop_and_resize

parents d8611151 b035a227
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Beam pipeline to create COCO Camera Traps Object Detection TFRecords.
Please note that this tool creates sharded output files.
This tool assumes the input annotations are in the COCO Camera Traps json
format, specified here:
https://github.com/Microsoft/CameraTraps/blob/master/data_management/README.md
Example usage:
python create_cococameratraps_tfexample_main.py \
--alsologtostderr \
--output_tfrecord_prefix="/path/to/output/tfrecord/location/prefix" \
--image_directory="/path/to/image/folder/" \
--input_annotations_file="path/to/annotations.json"
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import hashlib
import io
import json
import logging
import os
from absl import app
from absl import flags
import apache_beam as beam
import numpy as np
import PIL.Image
import tensorflow.compat.v1 as tf
from apache_beam import runners
from object_detection.utils import dataset_util
flags.DEFINE_string('image_directory', None, 'Directory where images are '
'stored')
flags.DEFINE_string('output_tfrecord_prefix', None,
'TFRecord containing images in tf.Example format.')
flags.DEFINE_string('input_annotations_file', None, 'Path to Coco-CameraTraps'
'style annotations file')
flags.DEFINE_integer('num_images_per_shard',
200,
'The number of images to be stored in each shard.')
FLAGS = flags.FLAGS
class ParseImage(beam.DoFn):
"""A DoFn that parses a COCO-CameraTraps json and emits TFRecords."""
def __init__(self, image_directory, images, annotations, categories,
keep_bboxes):
"""Initialization function.
Args:
image_directory: Path to image directory
images: list of COCO Camera Traps style image dictionaries
annotations: list of COCO Camera Traps style annotation dictionaries
categories: list of COCO Camera Traps style category dictionaries
keep_bboxes: Whether to keep any bounding boxes that exist in the
annotations
"""
self._image_directory = image_directory
self._image_dict = {im['id']: im for im in images}
self._annotation_dict = {im['id']: [] for im in images}
self._category_dict = {int(cat['id']): cat for cat in categories}
for ann in annotations:
self._annotation_dict[ann['image_id']].append(ann)
self._images = images
self._keep_bboxes = keep_bboxes
self._num_examples_processed = beam.metrics.Metrics.counter(
'cococameratraps_data_generation', 'num_tf_examples_processed')
def process(self, image_id):
"""Builds a tf.Example given an image id.
Args:
image_id: the image id of the associated image
Returns:
List of tf.Examples.
"""
image = self._image_dict[image_id]
annotations = self._annotation_dict[image_id]
image_height = image['height']
image_width = image['width']
filename = image['file_name']
image_id = image['id']
image_location_id = image['location']
image_datetime = str(image['date_captured'])
image_sequence_id = str(image['seq_id'])
image_sequence_num_frames = int(image['seq_num_frames'])
image_sequence_frame_num = int(image['frame_num'])
full_path = os.path.join(self._image_directory, filename)
try:
# Ensure the image exists and is not corrupted
with tf.io.gfile.GFile(full_path, 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = PIL.Image.open(encoded_jpg_io)
# Ensure the image can be read by tf
with tf.Graph().as_default():
image = tf.image.decode_jpeg(encoded_jpg, channels=3)
init_op = tf.initialize_all_tables()
with tf.Session() as sess:
sess.run(init_op)
sess.run(image)
except Exception as e: # pylint: disable=broad-except
# The image file is missing or corrupt
tf.logging.error(str(e))
return []
key = hashlib.sha256(encoded_jpg).hexdigest()
feature_dict = {
'image/height':
dataset_util.int64_feature(image_height),
'image/width':
dataset_util.int64_feature(image_width),
'image/filename':
dataset_util.bytes_feature(filename.encode('utf8')),
'image/source_id':
dataset_util.bytes_feature(str(image_id).encode('utf8')),
'image/key/sha256':
dataset_util.bytes_feature(key.encode('utf8')),
'image/encoded':
dataset_util.bytes_feature(encoded_jpg),
'image/format':
dataset_util.bytes_feature('jpeg'.encode('utf8')),
'image/location':
dataset_util.bytes_feature(str(image_location_id).encode('utf8')),
'image/seq_num_frames':
dataset_util.int64_feature(image_sequence_num_frames),
'image/seq_frame_num':
dataset_util.int64_feature(image_sequence_frame_num),
'image/seq_id':
dataset_util.bytes_feature(image_sequence_id.encode('utf8')),
'image/date_captured':
dataset_util.bytes_feature(image_datetime.encode('utf8'))
}
num_annotations_skipped = 0
if annotations:
xmin = []
xmax = []
ymin = []
ymax = []
category_names = []
category_ids = []
area = []
for object_annotations in annotations:
if 'bbox' in object_annotations and self._keep_bboxes:
(x, y, width, height) = tuple(object_annotations['bbox'])
if width <= 0 or height <= 0:
num_annotations_skipped += 1
continue
if x + width > image_width or y + height > image_height:
num_annotations_skipped += 1
continue
xmin.append(float(x) / image_width)
xmax.append(float(x + width) / image_width)
ymin.append(float(y) / image_height)
ymax.append(float(y + height) / image_height)
if 'area' in object_annotations:
area.append(object_annotations['area'])
else:
# approximate area using l*w/2
area.append(width*height/2.0)
category_id = int(object_annotations['category_id'])
category_ids.append(category_id)
category_names.append(
self._category_dict[category_id]['name'].encode('utf8'))
feature_dict.update({
'image/object/bbox/xmin':
dataset_util.float_list_feature(xmin),
'image/object/bbox/xmax':
dataset_util.float_list_feature(xmax),
'image/object/bbox/ymin':
dataset_util.float_list_feature(ymin),
'image/object/bbox/ymax':
dataset_util.float_list_feature(ymax),
'image/object/class/text':
dataset_util.bytes_list_feature(category_names),
'image/object/class/label':
dataset_util.int64_list_feature(category_ids),
'image/object/area':
dataset_util.float_list_feature(area),
})
# For classification, add the first category to image/class/label and
# image/class/text
if not category_ids:
feature_dict.update({
'image/class/label':
dataset_util.int64_list_feature([0]),
'image/class/text':
dataset_util.bytes_list_feature(['empty'.encode('utf8')]),
})
else:
feature_dict.update({
'image/class/label':
dataset_util.int64_list_feature([category_ids[0]]),
'image/class/text':
dataset_util.bytes_list_feature([category_names[0]]),
})
else:
# Add empty class if there are no annotations
feature_dict.update({
'image/class/label':
dataset_util.int64_list_feature([0]),
'image/class/text':
dataset_util.bytes_list_feature(['empty'.encode('utf8')]),
})
example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
self._num_examples_processed.inc(1)
return [(example)]
def _load_json_data(data_file):
with tf.io.gfile.GFile(data_file, 'r') as fid:
data_dict = json.load(fid)
return data_dict
def create_pipeline(image_directory,
input_annotations_file,
output_tfrecord_prefix=None,
num_images_per_shard=200,
keep_bboxes=True):
"""Creates a beam pipeline for producing a COCO-CameraTraps Image dataset.
Args:
image_directory: Path to image directory
input_annotations_file: Path to a coco-cameratraps annotation file
output_tfrecord_prefix: Absolute path for tfrecord outputs. Final files will
be named {output_tfrecord_prefix}@N.
num_images_per_shard: The number of images to store in each shard
keep_bboxes: Whether to keep any bounding boxes that exist in the json file
Returns:
A Beam pipeline.
"""
logging.info('Reading data from COCO-CameraTraps Dataset.')
data = _load_json_data(input_annotations_file)
num_shards = int(np.ceil(float(len(data['images']))/num_images_per_shard))
def pipeline(root):
"""Builds beam pipeline."""
image_examples = (
root
| ('CreateCollections') >> beam.Create(
[im['id'] for im in data['images']])
| ('ParseImage') >> beam.ParDo(ParseImage(
image_directory, data['images'], data['annotations'],
data['categories'], keep_bboxes=keep_bboxes)))
_ = (image_examples
| ('Reshuffle') >> beam.Reshuffle()
| ('WriteTfImageExample') >> beam.io.tfrecordio.WriteToTFRecord(
output_tfrecord_prefix,
num_shards=num_shards,
coder=beam.coders.ProtoCoder(tf.train.Example)))
return pipeline
def main(_):
"""Runs the Beam pipeline that performs inference.
Args:
_: unused
"""
# must create before flags are used
runner = runners.DirectRunner()
dirname = os.path.dirname(FLAGS.output_tfrecord_prefix)
tf.io.gfile.makedirs(dirname)
runner.run(
create_pipeline(
image_directory=FLAGS.image_directory,
input_annotations_file=FLAGS.input_annotations_file,
output_tfrecord_prefix=FLAGS.output_tfrecord_prefix,
num_images_per_shard=FLAGS.num_images_per_shard))
if __name__ == '__main__':
flags.mark_flags_as_required([
'image_directory',
'input_annotations_file',
'output_tfrecord_prefix'
])
app.run(main)
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for create_cococameratraps_tfexample_main."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import datetime
import json
import os
import tempfile
import unittest
import numpy as np
from PIL import Image
import tensorflow.compat.v1 as tf
from object_detection.dataset_tools.context_rcnn import create_cococameratraps_tfexample_main
from object_detection.utils import tf_version
from apache_beam import runners
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
IMAGE_HEIGHT = 360
IMAGE_WIDTH = 480
def _write_random_images_to_directory(self, directory, num_frames):
for frame_num in range(num_frames):
img = np.random.randint(0, high=256,
size=(self.IMAGE_HEIGHT, self.IMAGE_WIDTH, 3),
dtype=np.uint8)
pil_image = Image.fromarray(img)
fname = 'im_' + str(frame_num) + '.jpg'
pil_image.save(os.path.join(directory, fname), 'JPEG')
def _create_json_file(self, directory, num_frames, keep_bboxes=False):
json_dict = {'images': [], 'annotations': []}
json_dict['categories'] = [{'id': 0, 'name': 'empty'},
{'id': 1, 'name': 'animal'}]
for idx in range(num_frames):
im = {'id': 'im_' + str(idx),
'file_name': 'im_' + str(idx) + '.jpg',
'height': self.IMAGE_HEIGHT,
'width': self.IMAGE_WIDTH,
'seq_id': 'seq_1',
'seq_num_frames': num_frames,
'frame_num': idx,
'location': 'loc_' + str(idx),
'date_captured': str(datetime.datetime.now())
}
json_dict['images'].append(im)
ann = {'id': 'ann' + str(idx),
'image_id': 'im_' + str(idx),
'category_id': 1,
}
if keep_bboxes:
ann['bbox'] = [0.0 * self.IMAGE_WIDTH,
0.1 * self.IMAGE_HEIGHT,
0.5 * self.IMAGE_WIDTH,
0.5 * self.IMAGE_HEIGHT]
json_dict['annotations'].append(ann)
json_path = os.path.join(directory, 'test_file.json')
with tf.io.gfile.GFile(json_path, 'w') as f:
json.dump(json_dict, f)
return json_path
def assert_expected_example_bbox(self, example):
self.assertAllClose(
example.features.feature['image/object/bbox/ymin'].float_list.value,
[0.1])
self.assertAllClose(
example.features.feature['image/object/bbox/xmin'].float_list.value,
[0.0])
self.assertAllClose(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[0.6])
self.assertAllClose(
example.features.feature['image/object/bbox/xmax'].float_list.value,
[0.5])
self.assertAllClose(
example.features.feature['image/object/class/label']
.int64_list.value, [1])
self.assertAllEqual(
example.features.feature['image/object/class/text']
.bytes_list.value, [b'animal'])
self.assertAllClose(
example.features.feature['image/class/label']
.int64_list.value, [1])
self.assertAllEqual(
example.features.feature['image/class/text']
.bytes_list.value, [b'animal'])
# Check other essential attributes.
self.assertAllEqual(
example.features.feature['image/height'].int64_list.value,
[self.IMAGE_HEIGHT])
self.assertAllEqual(
example.features.feature['image/width'].int64_list.value,
[self.IMAGE_WIDTH])
self.assertAllEqual(
example.features.feature['image/source_id'].bytes_list.value,
[b'im_0'])
self.assertTrue(
example.features.feature['image/encoded'].bytes_list.value)
def assert_expected_example(self, example):
self.assertAllClose(
example.features.feature['image/object/bbox/ymin'].float_list.value,
[])
self.assertAllClose(
example.features.feature['image/object/bbox/xmin'].float_list.value,
[])
self.assertAllClose(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[])
self.assertAllClose(
example.features.feature['image/object/bbox/xmax'].float_list.value,
[])
self.assertAllClose(
example.features.feature['image/object/class/label']
.int64_list.value, [1])
self.assertAllEqual(
example.features.feature['image/object/class/text']
.bytes_list.value, [b'animal'])
self.assertAllClose(
example.features.feature['image/class/label']
.int64_list.value, [1])
self.assertAllEqual(
example.features.feature['image/class/text']
.bytes_list.value, [b'animal'])
# Check other essential attributes.
self.assertAllEqual(
example.features.feature['image/height'].int64_list.value,
[self.IMAGE_HEIGHT])
self.assertAllEqual(
example.features.feature['image/width'].int64_list.value,
[self.IMAGE_WIDTH])
self.assertAllEqual(
example.features.feature['image/source_id'].bytes_list.value,
[b'im_0'])
self.assertTrue(
example.features.feature['image/encoded'].bytes_list.value)
def test_beam_pipeline(self):
runner = runners.DirectRunner()
num_frames = 1
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
json_path = self._create_json_file(temp_dir, num_frames)
output_tfrecord = temp_dir+'/output'
self._write_random_images_to_directory(temp_dir, num_frames)
pipeline = create_cococameratraps_tfexample_main.create_pipeline(
temp_dir, json_path,
output_tfrecord_prefix=output_tfrecord)
runner.run(pipeline)
filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
actual_output = []
record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
for record in record_iterator:
actual_output.append(record)
self.assertEqual(len(actual_output), num_frames)
self.assert_expected_example(tf.train.Example.FromString(
actual_output[0]))
def test_beam_pipeline_bbox(self):
runner = runners.DirectRunner()
num_frames = 1
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
json_path = self._create_json_file(temp_dir, num_frames, keep_bboxes=True)
output_tfrecord = temp_dir+'/output'
self._write_random_images_to_directory(temp_dir, num_frames)
pipeline = create_cococameratraps_tfexample_main.create_pipeline(
temp_dir, json_path,
output_tfrecord_prefix=output_tfrecord,
keep_bboxes=True)
runner.run(pipeline)
filenames = tf.io.gfile.glob(output_tfrecord+'-?????-of-?????')
actual_output = []
record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
for record in record_iterator:
actual_output.append(record)
self.assertEqual(len(actual_output), num_frames)
self.assert_expected_example_bbox(tf.train.Example.FromString(
actual_output[0]))
if __name__ == '__main__':
tf.test.main()
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""A Beam job to generate detection data for camera trap images.
This tools allows to run inference with an exported Object Detection model in
`saved_model` format and produce raw detection boxes on images in tf.Examples,
with the assumption that the bounding box class label will match the image-level
class label in the tf.Example.
Steps to generate a detection dataset:
1. Use object_detection/export_inference_graph.py to get a `saved_model` for
inference. The input node must accept a tf.Example proto.
2. Run this tool with `saved_model` from step 1 and an TFRecord of tf.Example
protos containing images for inference.
Example Usage:
--------------
python tensorflow_models/object_detection/export_inference_graph.py \
--alsologtostderr \
--input_type tf_example \
--pipeline_config_path path/to/detection_model.config \
--trained_checkpoint_prefix path/to/model.ckpt \
--output_directory path/to/exported_model_directory
python generate_detection_data.py \
--alsologtostderr \
--input_tfrecord path/to/input_tfrecord@X \
--output_tfrecord path/to/output_tfrecord@X \
--model_dir path/to/exported_model_directory/saved_model
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import threading
from absl import app
from absl import flags
import apache_beam as beam
import tensorflow.compat.v1 as tf
from apache_beam import runners
flags.DEFINE_string('detection_input_tfrecord', None, 'TFRecord containing '
'images in tf.Example format for object detection.')
flags.DEFINE_string('detection_output_tfrecord', None,
'TFRecord containing detections in tf.Example format.')
flags.DEFINE_string('detection_model_dir', None, 'Path to directory containing'
'an object detection SavedModel.')
flags.DEFINE_float('confidence_threshold', 0.9,
'Min confidence to keep bounding boxes')
flags.DEFINE_integer('num_shards', 0, 'Number of output shards.')
FLAGS = flags.FLAGS
class GenerateDetectionDataFn(beam.DoFn):
"""Generates detection data for camera trap images.
This Beam DoFn performs inference with an object detection `saved_model` and
produces detection boxes for camera trap data, matched to the
object class.
"""
session_lock = threading.Lock()
def __init__(self, model_dir, confidence_threshold):
"""Initialization function.
Args:
model_dir: A directory containing saved model.
confidence_threshold: the confidence threshold for boxes to keep
"""
self._model_dir = model_dir
self._confidence_threshold = confidence_threshold
self._session = None
self._num_examples_processed = beam.metrics.Metrics.counter(
'detection_data_generation', 'num_tf_examples_processed')
def start_bundle(self):
self._load_inference_model()
def _load_inference_model(self):
# Because initialization of the tf.Session is expensive we share
# one instance across all threads in the worker. This is possible since
# tf.Session.run() is thread safe.
with self.session_lock:
if self._session is None:
graph = tf.Graph()
self._session = tf.Session(graph=graph)
with graph.as_default():
meta_graph = tf.saved_model.loader.load(
self._session, [tf.saved_model.tag_constants.SERVING],
self._model_dir)
signature = meta_graph.signature_def['serving_default']
input_tensor_name = signature.inputs['inputs'].name
self._input = graph.get_tensor_by_name(input_tensor_name)
self._boxes_node = graph.get_tensor_by_name(
signature.outputs['detection_boxes'].name)
self._scores_node = graph.get_tensor_by_name(
signature.outputs['detection_scores'].name)
self._num_detections_node = graph.get_tensor_by_name(
signature.outputs['num_detections'].name)
def process(self, tfrecord_entry):
return self._run_inference_and_generate_detections(tfrecord_entry)
def _run_inference_and_generate_detections(self, tfrecord_entry):
input_example = tf.train.Example.FromString(tfrecord_entry)
if input_example.features.feature[
'image/object/bbox/ymin'].float_list.value:
# There are already ground truth boxes for this image, just keep them.
return [input_example]
detection_boxes, detection_scores, num_detections = self._session.run(
[self._boxes_node, self._scores_node, self._num_detections_node],
feed_dict={self._input: [tfrecord_entry]})
example = tf.train.Example()
num_detections = int(num_detections[0])
image_class_labels = input_example.features.feature[
'image/object/class/label'].int64_list.value
image_class_texts = input_example.features.feature[
'image/object/class/text'].bytes_list.value
# Ignore any images with multiple classes,
# we can't match the class to the box.
if len(image_class_labels) > 1:
return []
# Don't add boxes for images already labeled empty (for now)
if len(image_class_labels) == 1:
# Add boxes over confidence threshold.
for idx, score in enumerate(detection_scores[0]):
if score >= self._confidence_threshold and idx < num_detections:
example.features.feature[
'image/object/bbox/ymin'].float_list.value.extend([
detection_boxes[0, idx, 0]])
example.features.feature[
'image/object/bbox/xmin'].float_list.value.extend([
detection_boxes[0, idx, 1]])
example.features.feature[
'image/object/bbox/ymax'].float_list.value.extend([
detection_boxes[0, idx, 2]])
example.features.feature[
'image/object/bbox/xmax'].float_list.value.extend([
detection_boxes[0, idx, 3]])
# Add box scores and class texts and labels.
example.features.feature[
'image/object/class/score'].float_list.value.extend(
[score])
example.features.feature[
'image/object/class/label'].int64_list.value.extend(
[image_class_labels[0]])
example.features.feature[
'image/object/class/text'].bytes_list.value.extend(
[image_class_texts[0]])
# Add other essential example attributes
example.features.feature['image/encoded'].bytes_list.value.extend(
input_example.features.feature['image/encoded'].bytes_list.value)
example.features.feature['image/height'].int64_list.value.extend(
input_example.features.feature['image/height'].int64_list.value)
example.features.feature['image/width'].int64_list.value.extend(
input_example.features.feature['image/width'].int64_list.value)
example.features.feature['image/source_id'].bytes_list.value.extend(
input_example.features.feature['image/source_id'].bytes_list.value)
example.features.feature['image/location'].bytes_list.value.extend(
input_example.features.feature['image/location'].bytes_list.value)
example.features.feature['image/date_captured'].bytes_list.value.extend(
input_example.features.feature['image/date_captured'].bytes_list.value)
example.features.feature['image/class/text'].bytes_list.value.extend(
input_example.features.feature['image/class/text'].bytes_list.value)
example.features.feature['image/class/label'].int64_list.value.extend(
input_example.features.feature['image/class/label'].int64_list.value)
example.features.feature['image/seq_id'].bytes_list.value.extend(
input_example.features.feature['image/seq_id'].bytes_list.value)
example.features.feature['image/seq_num_frames'].int64_list.value.extend(
input_example.features.feature['image/seq_num_frames'].int64_list.value)
example.features.feature['image/seq_frame_num'].int64_list.value.extend(
input_example.features.feature['image/seq_frame_num'].int64_list.value)
self._num_examples_processed.inc(1)
return [example]
def construct_pipeline(input_tfrecord, output_tfrecord, model_dir,
confidence_threshold, num_shards):
"""Returns a Beam pipeline to run object detection inference.
Args:
input_tfrecord: A TFRecord of tf.train.Example protos containing images.
output_tfrecord: A TFRecord of tf.train.Example protos that contain images
in the input TFRecord and the detections from the model.
model_dir: Path to `saved_model` to use for inference.
confidence_threshold: Threshold to use when keeping detection results.
num_shards: The number of output shards.
Returns:
pipeline: A Beam pipeline.
"""
def pipeline(root):
input_collection = (
root | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
input_tfrecord,
coder=beam.coders.BytesCoder()))
output_collection = input_collection | 'RunInference' >> beam.ParDo(
GenerateDetectionDataFn(model_dir, confidence_threshold))
output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle()
_ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
output_tfrecord,
num_shards=num_shards,
coder=beam.coders.ProtoCoder(tf.train.Example))
return pipeline
def main(_):
"""Runs the Beam pipeline that performs inference.
Args:
_: unused
"""
# must create before flags are used
runner = runners.DirectRunner()
dirname = os.path.dirname(FLAGS.detection_output_tfrecord)
tf.io.gfile.makedirs(dirname)
runner.run(
construct_pipeline(FLAGS.detection_input_tfrecord,
FLAGS.detection_output_tfrecord,
FLAGS.detection_model_dir,
FLAGS.confidence_threshold,
FLAGS.num_shards))
if __name__ == '__main__':
flags.mark_flags_as_required([
'detection_input_tfrecord',
'detection_output_tfrecord',
'detection_model_dir'
])
app.run(main)
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for generate_detection_data."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import contextlib
import os
import tempfile
import unittest
import numpy as np
import six
import tensorflow.compat.v1 as tf
from object_detection import exporter
from object_detection.builders import model_builder
from object_detection.core import model
from object_detection.dataset_tools.context_rcnn import generate_detection_data
from object_detection.protos import pipeline_pb2
from object_detection.utils import tf_version
from apache_beam import runners
if six.PY2:
import mock # pylint: disable=g-import-not-at-top
else:
mock = unittest.mock
class FakeModel(model.DetectionModel):
"""A Fake Detection model with expected output nodes from post-processing."""
def preprocess(self, inputs):
true_image_shapes = [] # Doesn't matter for the fake model.
return tf.identity(inputs), true_image_shapes
def predict(self, preprocessed_inputs, true_image_shapes):
return {'image': tf.layers.conv2d(preprocessed_inputs, 3, 1)}
def postprocess(self, prediction_dict, true_image_shapes):
with tf.control_dependencies(prediction_dict.values()):
postprocessed_tensors = {
'detection_boxes': tf.constant([[[0.0, 0.1, 0.5, 0.6],
[0.5, 0.5, 0.8, 0.8]]], tf.float32),
'detection_scores': tf.constant([[0.95, 0.6]], tf.float32),
'detection_multiclass_scores': tf.constant([[[0.1, 0.7, 0.2],
[0.3, 0.1, 0.6]]],
tf.float32),
'detection_classes': tf.constant([[0, 1]], tf.float32),
'num_detections': tf.constant([2], tf.float32)
}
return postprocessed_tensors
def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
pass
def restore_from_objects(self, fine_tune_checkpoint_type):
pass
def loss(self, prediction_dict, true_image_shapes):
pass
def regularization_losses(self):
pass
def updates(self):
pass
@contextlib.contextmanager
def InMemoryTFRecord(entries):
temp = tempfile.NamedTemporaryFile(delete=False)
filename = temp.name
try:
with tf.python_io.TFRecordWriter(filename) as writer:
for value in entries:
writer.write(value)
yield filename
finally:
os.unlink(filename)
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class GenerateDetectionDataTest(tf.test.TestCase):
def _save_checkpoint_from_mock_model(self, checkpoint_path):
"""A function to save checkpoint from a fake Detection Model.
Args:
checkpoint_path: Path to save checkpoint from Fake model.
"""
g = tf.Graph()
with g.as_default():
mock_model = FakeModel(num_classes=5)
preprocessed_inputs, true_image_shapes = mock_model.preprocess(
tf.placeholder(tf.float32, shape=[None, None, None, 3]))
predictions = mock_model.predict(preprocessed_inputs, true_image_shapes)
mock_model.postprocess(predictions, true_image_shapes)
tf.train.get_or_create_global_step()
saver = tf.train.Saver()
init = tf.global_variables_initializer()
with self.test_session(graph=g) as sess:
sess.run(init)
saver.save(sess, checkpoint_path)
def _export_saved_model(self):
tmp_dir = self.get_temp_dir()
checkpoint_path = os.path.join(tmp_dir, 'model.ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path)
output_directory = os.path.join(tmp_dir, 'output')
saved_model_path = os.path.join(output_directory, 'saved_model')
tf.io.gfile.makedirs(output_directory)
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(num_classes=5)
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
detection_model = model_builder.build(pipeline_config.model,
is_training=False)
outputs, placeholder_tensor = exporter.build_detection_graph(
input_type='tf_example',
detection_model=detection_model,
input_shape=None,
output_collection_name='inference_op',
graph_hook_fn=None)
output_node_names = ','.join(outputs.keys())
saver = tf.train.Saver()
input_saver_def = saver.as_saver_def()
frozen_graph_def = exporter.freeze_graph_with_def_protos(
input_graph_def=tf.get_default_graph().as_graph_def(),
input_saver_def=input_saver_def,
input_checkpoint=checkpoint_path,
output_node_names=output_node_names,
restore_op_name='save/restore_all',
filename_tensor_name='save/Const:0',
output_graph='',
clear_devices=True,
initializer_nodes='')
exporter.write_saved_model(
saved_model_path=saved_model_path,
frozen_graph_def=frozen_graph_def,
inputs=placeholder_tensor,
outputs=outputs)
return saved_model_path
def _create_tf_example(self):
with self.test_session():
encoded_image = tf.image.encode_jpeg(
tf.constant(np.ones((4, 6, 3)).astype(np.uint8))).eval()
def BytesFeature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def Int64Feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': BytesFeature(encoded_image),
'image/source_id': BytesFeature(b'image_id'),
'image/height': Int64Feature(4),
'image/width': Int64Feature(6),
'image/object/class/label': Int64Feature(5),
'image/object/class/text': BytesFeature(b'hyena'),
'image/class/label': Int64Feature(5),
'image/class/text': BytesFeature(b'hyena'),
}))
return example.SerializeToString()
def assert_expected_example(self, example):
self.assertAllClose(
example.features.feature['image/object/bbox/ymin'].float_list.value,
[0.0])
self.assertAllClose(
example.features.feature['image/object/bbox/xmin'].float_list.value,
[0.1])
self.assertAllClose(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[0.5])
self.assertAllClose(
example.features.feature['image/object/bbox/xmax'].float_list.value,
[0.6])
self.assertAllClose(
example.features.feature['image/object/class/score']
.float_list.value, [0.95])
self.assertAllClose(
example.features.feature['image/object/class/label']
.int64_list.value, [5])
self.assertAllEqual(
example.features.feature['image/object/class/text']
.bytes_list.value, [b'hyena'])
self.assertAllClose(
example.features.feature['image/class/label']
.int64_list.value, [5])
self.assertAllEqual(
example.features.feature['image/class/text']
.bytes_list.value, [b'hyena'])
# Check other essential attributes.
self.assertAllEqual(
example.features.feature['image/height'].int64_list.value, [4])
self.assertAllEqual(
example.features.feature['image/width'].int64_list.value, [6])
self.assertAllEqual(
example.features.feature['image/source_id'].bytes_list.value,
[b'image_id'])
self.assertTrue(
example.features.feature['image/encoded'].bytes_list.value)
def test_generate_detection_data_fn(self):
saved_model_path = self._export_saved_model()
confidence_threshold = 0.8
inference_fn = generate_detection_data.GenerateDetectionDataFn(
saved_model_path, confidence_threshold)
inference_fn.start_bundle()
generated_example = self._create_tf_example()
self.assertAllEqual(tf.train.Example.FromString(
generated_example).features.feature['image/object/class/label']
.int64_list.value, [5])
self.assertAllEqual(tf.train.Example.FromString(
generated_example).features.feature['image/object/class/text']
.bytes_list.value, [b'hyena'])
output = inference_fn.process(generated_example)
output_example = output[0]
self.assertAllEqual(
output_example.features.feature['image/object/class/label']
.int64_list.value, [5])
self.assertAllEqual(output_example.features.feature['image/width']
.int64_list.value, [6])
self.assert_expected_example(output_example)
def test_beam_pipeline(self):
with InMemoryTFRecord([self._create_tf_example()]) as input_tfrecord:
runner = runners.DirectRunner()
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
saved_model_path = self._export_saved_model()
confidence_threshold = 0.8
num_shards = 1
pipeline = generate_detection_data.construct_pipeline(
input_tfrecord, output_tfrecord, saved_model_path,
confidence_threshold, num_shards)
runner.run(pipeline)
filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
actual_output = []
record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
for record in record_iterator:
actual_output.append(record)
self.assertEqual(len(actual_output), 1)
self.assert_expected_example(tf.train.Example.FromString(
actual_output[0]))
if __name__ == '__main__':
tf.test.main()
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""A Beam job to generate embedding data for camera trap images.
This tool runs inference with an exported Object Detection model in
`saved_model` format and produce raw embeddings for camera trap data. These
embeddings contain an object-centric feature embedding from Faster R-CNN, the
datetime that the image was taken (normalized in a specific way), and the
position of the object of interest. By default, only the highest-scoring object
embedding is included.
Steps to generate a embedding dataset:
1. Use object_detection/export_inference_graph.py to get a Faster R-CNN
`saved_model` for inference. The input node must accept a tf.Example proto.
2. Run this tool with `saved_model` from step 1 and an TFRecord of tf.Example
protos containing images for inference.
Example Usage:
--------------
python tensorflow_models/object_detection/export_inference_graph.py \
--alsologtostderr \
--input_type tf_example \
--pipeline_config_path path/to/faster_rcnn_model.config \
--trained_checkpoint_prefix path/to/model.ckpt \
--output_directory path/to/exported_model_directory
python generate_embedding_data.py \
--alsologtostderr \
--embedding_input_tfrecord path/to/input_tfrecords* \
--embedding_output_tfrecord path/to/output_tfrecords \
--embedding_model_dir path/to/exported_model_directory/saved_model
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import datetime
import os
import threading
from absl import app
from absl import flags
import apache_beam as beam
import numpy as np
import six
import tensorflow.compat.v1 as tf
from apache_beam import runners
flags.DEFINE_string('embedding_input_tfrecord', None, 'TFRecord containing'
'images in tf.Example format for object detection.')
flags.DEFINE_string('embedding_output_tfrecord', None,
'TFRecord containing embeddings in tf.Example format.')
flags.DEFINE_string('embedding_model_dir', None, 'Path to directory containing'
'an object detection SavedModel with'
'detection_box_classifier_features in the output.')
flags.DEFINE_integer('top_k_embedding_count', 1,
'The number of top k embeddings to add to the memory bank.'
)
flags.DEFINE_integer('bottom_k_embedding_count', 0,
'The number of bottom k embeddings to add to the memory '
'bank.')
flags.DEFINE_integer('num_shards', 0, 'Number of output shards.')
FLAGS = flags.FLAGS
class GenerateEmbeddingDataFn(beam.DoFn):
"""Generates embedding data for camera trap images.
This Beam DoFn performs inference with an object detection `saved_model` and
produces contextual embedding vectors.
"""
session_lock = threading.Lock()
def __init__(self, model_dir, top_k_embedding_count,
bottom_k_embedding_count):
"""Initialization function.
Args:
model_dir: A directory containing saved model.
top_k_embedding_count: the number of high-confidence embeddings to store
bottom_k_embedding_count: the number of low-confidence embeddings to store
"""
self._model_dir = model_dir
self._session = None
self._num_examples_processed = beam.metrics.Metrics.counter(
'embedding_data_generation', 'num_tf_examples_processed')
self._top_k_embedding_count = top_k_embedding_count
self._bottom_k_embedding_count = bottom_k_embedding_count
def start_bundle(self):
self._load_inference_model()
def _load_inference_model(self):
# Because initialization of the tf.Session is expensive we share
# one instance across all threads in the worker. This is possible since
# tf.Session.run() is thread safe.
with self.session_lock:
if self._session is None:
graph = tf.Graph()
self._session = tf.Session(graph=graph)
with graph.as_default():
meta_graph = tf.saved_model.loader.load(
self._session, [tf.saved_model.tag_constants.SERVING],
self._model_dir)
signature = meta_graph.signature_def['serving_default']
input_tensor_name = signature.inputs['inputs'].name
detection_features_name = signature.outputs['detection_features'].name
detection_boxes_name = signature.outputs['detection_boxes'].name
num_detections_name = signature.outputs['num_detections'].name
self._input = graph.get_tensor_by_name(input_tensor_name)
self._embedding_node = graph.get_tensor_by_name(detection_features_name)
self._box_node = graph.get_tensor_by_name(detection_boxes_name)
self._scores_node = graph.get_tensor_by_name(
signature.outputs['detection_scores'].name)
self._num_detections = graph.get_tensor_by_name(num_detections_name)
tf.logging.info(signature.outputs['detection_features'].name)
tf.logging.info(signature.outputs['detection_boxes'].name)
tf.logging.info(signature.outputs['num_detections'].name)
def process(self, tfrecord_entry):
return self._run_inference_and_generate_embedding(tfrecord_entry)
def _run_inference_and_generate_embedding(self, tfrecord_entry):
input_example = tf.train.Example.FromString(tfrecord_entry)
# Convert date_captured datetime string to unix time integer and store
def get_date_captured(example):
date_captured = datetime.datetime.strptime(
six.ensure_str(
example.features.feature[
'image/date_captured'].bytes_list.value[0]),
'%Y-%m-%d %H:%M:%S')
return date_captured
try:
date_captured = get_date_captured(input_example)
except Exception: # pylint: disable=broad-except
# we require date_captured to be available for all images
return []
def embed_date_captured(date_captured):
"""Encodes the datetime of the image."""
embedded_date_captured = []
month_max = 12.0
day_max = 31.0
hour_max = 24.0
minute_max = 60.0
min_year = 1990.0
max_year = 2030.0
year = (date_captured.year-min_year)/float(max_year-min_year)
embedded_date_captured.append(year)
month = (date_captured.month-1)/month_max
embedded_date_captured.append(month)
day = (date_captured.day-1)/day_max
embedded_date_captured.append(day)
hour = date_captured.hour/hour_max
embedded_date_captured.append(hour)
minute = date_captured.minute/minute_max
embedded_date_captured.append(minute)
return np.asarray(embedded_date_captured)
def embed_position_and_size(box):
"""Encodes the bounding box of the object of interest."""
ymin = box[0]
xmin = box[1]
ymax = box[2]
xmax = box[3]
w = xmax - xmin
h = ymax - ymin
x = xmin + w / 2.0
y = ymin + h / 2.0
return np.asarray([x, y, w, h])
unix_time = (
(date_captured - datetime.datetime.fromtimestamp(0)).total_seconds())
example = tf.train.Example()
example.features.feature['image/unix_time'].float_list.value.extend(
[unix_time])
(detection_features, detection_boxes, num_detections,
detection_scores) = self._session.run(
[
self._embedding_node, self._box_node, self._num_detections[0],
self._scores_node
],
feed_dict={self._input: [tfrecord_entry]})
num_detections = int(num_detections)
embed_all = []
score_all = []
detection_features = np.asarray(detection_features)
def get_bb_embedding(detection_features, detection_boxes, detection_scores,
index):
embedding = detection_features[0][index]
pooled_embedding = np.mean(np.mean(embedding, axis=1), axis=0)
box = detection_boxes[0][index]
position_embedding = embed_position_and_size(box)
score = detection_scores[0][index]
return np.concatenate((pooled_embedding, position_embedding)), score
temporal_embedding = embed_date_captured(date_captured)
embedding_count = 0
for index in range(min(num_detections, self._top_k_embedding_count)):
bb_embedding, score = get_bb_embedding(
detection_features, detection_boxes, detection_scores, index)
embed_all.extend(bb_embedding)
embed_all.extend(temporal_embedding)
score_all.append(score)
embedding_count += 1
for index in range(
max(0, num_detections - 1),
max(-1, num_detections - 1 - self._bottom_k_embedding_count), -1):
bb_embedding, score = get_bb_embedding(
detection_features, detection_boxes, detection_scores, index)
embed_all.extend(bb_embedding)
embed_all.extend(temporal_embedding)
score_all.append(score)
embedding_count += 1
if embedding_count == 0:
bb_embedding, score = get_bb_embedding(
detection_features, detection_boxes, detection_scores, 0)
embed_all.extend(bb_embedding)
embed_all.extend(temporal_embedding)
score_all.append(score)
# Takes max in case embedding_count is 0.
embedding_length = len(embed_all) // max(1, embedding_count)
embed_all = np.asarray(embed_all)
example.features.feature['image/embedding'].float_list.value.extend(
embed_all)
example.features.feature['image/embedding_score'].float_list.value.extend(
score_all)
example.features.feature['image/embedding_length'].int64_list.value.append(
embedding_length)
example.features.feature['image/embedding_count'].int64_list.value.append(
embedding_count)
# Add other essential example attributes
example.features.feature['image/encoded'].bytes_list.value.extend(
input_example.features.feature['image/encoded'].bytes_list.value)
example.features.feature['image/height'].int64_list.value.extend(
input_example.features.feature['image/height'].int64_list.value)
example.features.feature['image/width'].int64_list.value.extend(
input_example.features.feature['image/width'].int64_list.value)
example.features.feature['image/source_id'].bytes_list.value.extend(
input_example.features.feature['image/source_id'].bytes_list.value)
example.features.feature['image/location'].bytes_list.value.extend(
input_example.features.feature['image/location'].bytes_list.value)
example.features.feature['image/date_captured'].bytes_list.value.extend(
input_example.features.feature['image/date_captured'].bytes_list.value)
example.features.feature['image/class/text'].bytes_list.value.extend(
input_example.features.feature['image/class/text'].bytes_list.value)
example.features.feature['image/class/label'].int64_list.value.extend(
input_example.features.feature['image/class/label'].int64_list.value)
example.features.feature['image/seq_id'].bytes_list.value.extend(
input_example.features.feature['image/seq_id'].bytes_list.value)
example.features.feature['image/seq_num_frames'].int64_list.value.extend(
input_example.features.feature['image/seq_num_frames'].int64_list.value)
example.features.feature['image/seq_frame_num'].int64_list.value.extend(
input_example.features.feature['image/seq_frame_num'].int64_list.value)
example.features.feature['image/object/bbox/ymax'].float_list.value.extend(
input_example.features.feature[
'image/object/bbox/ymax'].float_list.value)
example.features.feature['image/object/bbox/ymin'].float_list.value.extend(
input_example.features.feature[
'image/object/bbox/ymin'].float_list.value)
example.features.feature['image/object/bbox/xmax'].float_list.value.extend(
input_example.features.feature[
'image/object/bbox/xmax'].float_list.value)
example.features.feature['image/object/bbox/xmin'].float_list.value.extend(
input_example.features.feature[
'image/object/bbox/xmin'].float_list.value)
example.features.feature[
'image/object/class/score'].float_list.value.extend(
input_example.features.feature[
'image/object/class/score'].float_list.value)
example.features.feature[
'image/object/class/label'].int64_list.value.extend(
input_example.features.feature[
'image/object/class/label'].int64_list.value)
example.features.feature[
'image/object/class/text'].bytes_list.value.extend(
input_example.features.feature[
'image/object/class/text'].bytes_list.value)
self._num_examples_processed.inc(1)
return [example]
def construct_pipeline(input_tfrecord, output_tfrecord, model_dir,
top_k_embedding_count, bottom_k_embedding_count,
num_shards):
"""Returns a beam pipeline to run object detection inference.
Args:
input_tfrecord: An TFRecord of tf.train.Example protos containing images.
output_tfrecord: An TFRecord of tf.train.Example protos that contain images
in the input TFRecord and the detections from the model.
model_dir: Path to `saved_model` to use for inference.
top_k_embedding_count: The number of high-confidence embeddings to store.
bottom_k_embedding_count: The number of low-confidence embeddings to store.
num_shards: The number of output shards.
"""
def pipeline(root):
input_collection = (
root | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
input_tfrecord,
coder=beam.coders.BytesCoder()))
output_collection = input_collection | 'ExtractEmbedding' >> beam.ParDo(
GenerateEmbeddingDataFn(model_dir, top_k_embedding_count,
bottom_k_embedding_count))
output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle()
_ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
output_tfrecord,
num_shards=num_shards,
coder=beam.coders.ProtoCoder(tf.train.Example))
return pipeline
def main(_):
"""Runs the Beam pipeline that performs inference.
Args:
_: unused
"""
# must create before flags are used
runner = runners.DirectRunner()
dirname = os.path.dirname(FLAGS.embedding_output_tfrecord)
tf.io.gfile.makedirs(dirname)
runner.run(
construct_pipeline(FLAGS.embedding_input_tfrecord,
FLAGS.embedding_output_tfrecord,
FLAGS.embedding_model_dir, FLAGS.top_k_embedding_count,
FLAGS.bottom_k_embedding_count, FLAGS.num_shards))
if __name__ == '__main__':
flags.mark_flags_as_required([
'embedding_input_tfrecord',
'embedding_output_tfrecord',
'embedding_model_dir'
])
app.run(main)
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for generate_embedding_data."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import contextlib
import os
import tempfile
import unittest
import numpy as np
import six
import tensorflow.compat.v1 as tf
from object_detection import exporter
from object_detection.builders import model_builder
from object_detection.core import model
from object_detection.dataset_tools.context_rcnn import generate_embedding_data
from object_detection.protos import pipeline_pb2
from object_detection.utils import tf_version
from apache_beam import runners
if six.PY2:
import mock # pylint: disable=g-import-not-at-top
else:
mock = unittest.mock
class FakeModel(model.DetectionModel):
"""A Fake Detection model with expected output nodes from post-processing."""
def preprocess(self, inputs):
true_image_shapes = [] # Doesn't matter for the fake model.
return tf.identity(inputs), true_image_shapes
def predict(self, preprocessed_inputs, true_image_shapes):
return {'image': tf.layers.conv2d(preprocessed_inputs, 3, 1)}
def postprocess(self, prediction_dict, true_image_shapes):
with tf.control_dependencies(prediction_dict.values()):
num_features = 100
feature_dims = 10
classifier_feature = np.ones(
(2, feature_dims, feature_dims, num_features),
dtype=np.float32).tolist()
postprocessed_tensors = {
'detection_boxes': tf.constant([[[0.0, 0.1, 0.5, 0.6],
[0.5, 0.5, 0.8, 0.8]]], tf.float32),
'detection_scores': tf.constant([[0.95, 0.6]], tf.float32),
'detection_multiclass_scores': tf.constant([[[0.1, 0.7, 0.2],
[0.3, 0.1, 0.6]]],
tf.float32),
'detection_classes': tf.constant([[0, 1]], tf.float32),
'num_detections': tf.constant([2], tf.float32),
'detection_features':
tf.constant([classifier_feature],
tf.float32)
}
return postprocessed_tensors
def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
pass
def restore_from_objects(self, fine_tune_checkpoint_type):
pass
def loss(self, prediction_dict, true_image_shapes):
pass
def regularization_losses(self):
pass
def updates(self):
pass
@contextlib.contextmanager
def InMemoryTFRecord(entries):
temp = tempfile.NamedTemporaryFile(delete=False)
filename = temp.name
try:
with tf.python_io.TFRecordWriter(filename) as writer:
for value in entries:
writer.write(value)
yield filename
finally:
os.unlink(temp.name)
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class GenerateEmbeddingData(tf.test.TestCase):
def _save_checkpoint_from_mock_model(self, checkpoint_path):
"""A function to save checkpoint from a fake Detection Model.
Args:
checkpoint_path: Path to save checkpoint from Fake model.
"""
g = tf.Graph()
with g.as_default():
mock_model = FakeModel(num_classes=5)
preprocessed_inputs, true_image_shapes = mock_model.preprocess(
tf.placeholder(tf.float32, shape=[None, None, None, 3]))
predictions = mock_model.predict(preprocessed_inputs, true_image_shapes)
mock_model.postprocess(predictions, true_image_shapes)
tf.train.get_or_create_global_step()
saver = tf.train.Saver()
init = tf.global_variables_initializer()
with self.test_session(graph=g) as sess:
sess.run(init)
saver.save(sess, checkpoint_path)
def _export_saved_model(self):
tmp_dir = self.get_temp_dir()
checkpoint_path = os.path.join(tmp_dir, 'model.ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path)
output_directory = os.path.join(tmp_dir, 'output')
saved_model_path = os.path.join(output_directory, 'saved_model')
tf.io.gfile.makedirs(output_directory)
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(num_classes=5)
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
detection_model = model_builder.build(pipeline_config.model,
is_training=False)
outputs, placeholder_tensor = exporter.build_detection_graph(
input_type='tf_example',
detection_model=detection_model,
input_shape=None,
output_collection_name='inference_op',
graph_hook_fn=None)
output_node_names = ','.join(outputs.keys())
saver = tf.train.Saver()
input_saver_def = saver.as_saver_def()
frozen_graph_def = exporter.freeze_graph_with_def_protos(
input_graph_def=tf.get_default_graph().as_graph_def(),
input_saver_def=input_saver_def,
input_checkpoint=checkpoint_path,
output_node_names=output_node_names,
restore_op_name='save/restore_all',
filename_tensor_name='save/Const:0',
output_graph='',
clear_devices=True,
initializer_nodes='')
exporter.write_saved_model(
saved_model_path=saved_model_path,
frozen_graph_def=frozen_graph_def,
inputs=placeholder_tensor,
outputs=outputs)
return saved_model_path
def _create_tf_example(self):
with self.test_session():
encoded_image = tf.image.encode_jpeg(
tf.constant(np.ones((4, 4, 3)).astype(np.uint8))).eval()
def BytesFeature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def Int64Feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def FloatFeature(value):
return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': BytesFeature(encoded_image),
'image/source_id': BytesFeature(b'image_id'),
'image/height': Int64Feature(400),
'image/width': Int64Feature(600),
'image/class/label': Int64Feature(5),
'image/class/text': BytesFeature(b'hyena'),
'image/object/bbox/xmin': FloatFeature(0.1),
'image/object/bbox/xmax': FloatFeature(0.6),
'image/object/bbox/ymin': FloatFeature(0.0),
'image/object/bbox/ymax': FloatFeature(0.5),
'image/object/class/score': FloatFeature(0.95),
'image/object/class/label': Int64Feature(5),
'image/object/class/text': BytesFeature(b'hyena'),
'image/date_captured': BytesFeature(b'2019-10-20 12:12:12')
}))
return example.SerializeToString()
def assert_expected_example(self, example, topk=False, botk=False):
# Check embeddings
if topk or botk:
self.assertEqual(len(
example.features.feature['image/embedding'].float_list.value),
218)
self.assertAllEqual(
example.features.feature['image/embedding_count'].int64_list.value,
[2])
else:
self.assertEqual(len(
example.features.feature['image/embedding'].float_list.value),
109)
self.assertAllEqual(
example.features.feature['image/embedding_count'].int64_list.value,
[1])
self.assertAllEqual(
example.features.feature['image/embedding_length'].int64_list.value,
[109])
# Check annotations
self.assertAllClose(
example.features.feature['image/object/bbox/ymin'].float_list.value,
[0.0])
self.assertAllClose(
example.features.feature['image/object/bbox/xmin'].float_list.value,
[0.1])
self.assertAllClose(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[0.5])
self.assertAllClose(
example.features.feature['image/object/bbox/xmax'].float_list.value,
[0.6])
self.assertAllClose(
example.features.feature['image/object/class/score']
.float_list.value, [0.95])
self.assertAllClose(
example.features.feature['image/object/class/label']
.int64_list.value, [5])
self.assertAllEqual(
example.features.feature['image/object/class/text']
.bytes_list.value, [b'hyena'])
self.assertAllClose(
example.features.feature['image/class/label']
.int64_list.value, [5])
self.assertAllEqual(
example.features.feature['image/class/text']
.bytes_list.value, [b'hyena'])
# Check other essential attributes.
self.assertAllEqual(
example.features.feature['image/height'].int64_list.value, [400])
self.assertAllEqual(
example.features.feature['image/width'].int64_list.value, [600])
self.assertAllEqual(
example.features.feature['image/source_id'].bytes_list.value,
[b'image_id'])
self.assertTrue(
example.features.feature['image/encoded'].bytes_list.value)
def test_generate_embedding_data_fn(self):
saved_model_path = self._export_saved_model()
top_k_embedding_count = 1
bottom_k_embedding_count = 0
inference_fn = generate_embedding_data.GenerateEmbeddingDataFn(
saved_model_path, top_k_embedding_count, bottom_k_embedding_count)
inference_fn.start_bundle()
generated_example = self._create_tf_example()
self.assertAllEqual(tf.train.Example.FromString(
generated_example).features.feature['image/object/class/label']
.int64_list.value, [5])
self.assertAllEqual(tf.train.Example.FromString(
generated_example).features.feature['image/object/class/text']
.bytes_list.value, [b'hyena'])
output = inference_fn.process(generated_example)
output_example = output[0]
self.assert_expected_example(output_example)
def test_generate_embedding_data_with_top_k_boxes(self):
saved_model_path = self._export_saved_model()
top_k_embedding_count = 2
bottom_k_embedding_count = 0
inference_fn = generate_embedding_data.GenerateEmbeddingDataFn(
saved_model_path, top_k_embedding_count, bottom_k_embedding_count)
inference_fn.start_bundle()
generated_example = self._create_tf_example()
self.assertAllEqual(
tf.train.Example.FromString(generated_example).features
.feature['image/object/class/label'].int64_list.value, [5])
self.assertAllEqual(
tf.train.Example.FromString(generated_example).features
.feature['image/object/class/text'].bytes_list.value, [b'hyena'])
output = inference_fn.process(generated_example)
output_example = output[0]
self.assert_expected_example(output_example, topk=True)
def test_generate_embedding_data_with_bottom_k_boxes(self):
saved_model_path = self._export_saved_model()
top_k_embedding_count = 0
bottom_k_embedding_count = 2
inference_fn = generate_embedding_data.GenerateEmbeddingDataFn(
saved_model_path, top_k_embedding_count, bottom_k_embedding_count)
inference_fn.start_bundle()
generated_example = self._create_tf_example()
self.assertAllEqual(
tf.train.Example.FromString(generated_example).features
.feature['image/object/class/label'].int64_list.value, [5])
self.assertAllEqual(
tf.train.Example.FromString(generated_example).features
.feature['image/object/class/text'].bytes_list.value, [b'hyena'])
output = inference_fn.process(generated_example)
output_example = output[0]
self.assert_expected_example(output_example, botk=True)
def test_beam_pipeline(self):
with InMemoryTFRecord([self._create_tf_example()]) as input_tfrecord:
runner = runners.DirectRunner()
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
saved_model_path = self._export_saved_model()
top_k_embedding_count = 1
bottom_k_embedding_count = 0
num_shards = 1
pipeline = generate_embedding_data.construct_pipeline(
input_tfrecord, output_tfrecord, saved_model_path,
top_k_embedding_count, bottom_k_embedding_count, num_shards)
runner.run(pipeline)
filenames = tf.io.gfile.glob(
output_tfrecord + '-?????-of-?????')
actual_output = []
record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
for record in record_iterator:
actual_output.append(record)
self.assertEqual(len(actual_output), 1)
self.assert_expected_example(tf.train.Example.FromString(
actual_output[0]))
if __name__ == '__main__':
tf.test.main()
......@@ -14,6 +14,9 @@
# ==============================================================================
r"""Convert raw COCO dataset to TFRecord for object_detection.
This tool supports data generation for object detection (boxes, masks),
keypoint detection, and DensePose.
Please note that this tool creates sharded output files.
Example usage:
......@@ -63,7 +66,18 @@ tf.flags.DEFINE_string('train_keypoint_annotations_file', '',
'Training annotations JSON file.')
tf.flags.DEFINE_string('val_keypoint_annotations_file', '',
'Validation annotations JSON file.')
# DensePose is only available for coco 2014.
tf.flags.DEFINE_string('train_densepose_annotations_file', '',
'Training annotations JSON file for DensePose.')
tf.flags.DEFINE_string('val_densepose_annotations_file', '',
'Validation annotations JSON file for DensePose.')
tf.flags.DEFINE_string('output_dir', '/tmp/', 'Output data directory.')
# Whether to only produce images/annotations on person class (for keypoint /
# densepose task).
tf.flags.DEFINE_boolean('remove_non_person_annotations', False, 'Whether to '
'remove all annotations for non-person objects.')
tf.flags.DEFINE_boolean('remove_non_person_images', False, 'Whether to '
'remove all examples that do not contain a person.')
FLAGS = flags.FLAGS
......@@ -77,13 +91,33 @@ _COCO_KEYPOINT_NAMES = [
b'left_knee', b'right_knee', b'left_ankle', b'right_ankle'
]
_COCO_PART_NAMES = [
b'torso_back', b'torso_front', b'right_hand', b'left_hand', b'left_foot',
b'right_foot', b'right_upper_leg_back', b'left_upper_leg_back',
b'right_upper_leg_front', b'left_upper_leg_front', b'right_lower_leg_back',
b'left_lower_leg_back', b'right_lower_leg_front', b'left_lower_leg_front',
b'left_upper_arm_back', b'right_upper_arm_back', b'left_upper_arm_front',
b'right_upper_arm_front', b'left_lower_arm_back', b'right_lower_arm_back',
b'left_lower_arm_front', b'right_lower_arm_front', b'right_face',
b'left_face',
]
_DP_PART_ID_OFFSET = 1
def clip_to_unit(x):
return min(max(x, 0.0), 1.0)
def create_tf_example(image,
annotations_list,
image_dir,
category_index,
include_masks=False,
keypoint_annotations_dict=None):
keypoint_annotations_dict=None,
densepose_annotations_dict=None,
remove_non_person_annotations=False,
remove_non_person_images=False):
"""Converts image and annotations to a tf.Example proto.
Args:
......@@ -108,10 +142,23 @@ def create_tf_example(image,
dictionary with keys: [u'keypoints', u'num_keypoints'] represeting the
keypoint information for this person object annotation. If None, then
no keypoint annotations will be populated.
densepose_annotations_dict: A dictionary that maps from annotation_id to a
dictionary with keys: [u'dp_I', u'dp_x', u'dp_y', 'dp_U', 'dp_V']
representing part surface coordinates. For more information see
http://densepose.org/.
remove_non_person_annotations: Whether to remove any annotations that are
not the "person" class.
remove_non_person_images: Whether to remove any images that do not contain
at least one "person" annotation.
Returns:
key: SHA256 hash of the image.
example: The converted tf.Example
num_annotations_skipped: Number of (invalid) annotations that were ignored.
num_keypoint_annotation_skipped: Number of keypoint annotations that were
skipped.
num_densepose_annotation_skipped: Number of DensePose annotations that were
skipped.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
......@@ -146,6 +193,16 @@ def create_tf_example(image,
num_annotations_skipped = 0
num_keypoint_annotation_used = 0
num_keypoint_annotation_skipped = 0
dp_part_index = []
dp_x = []
dp_y = []
dp_u = []
dp_v = []
dp_num_points = []
densepose_keys = ['dp_I', 'dp_U', 'dp_V', 'dp_x', 'dp_y', 'bbox']
include_densepose = densepose_annotations_dict is not None
num_densepose_annotation_used = 0
num_densepose_annotation_skipped = 0
for object_annotations in annotations_list:
(x, y, width, height) = tuple(object_annotations['bbox'])
if width <= 0 or height <= 0:
......@@ -154,14 +211,18 @@ def create_tf_example(image,
if x + width > image_width or y + height > image_height:
num_annotations_skipped += 1
continue
category_id = int(object_annotations['category_id'])
category_name = category_index[category_id]['name'].encode('utf8')
if remove_non_person_annotations and category_name != b'person':
num_annotations_skipped += 1
continue
xmin.append(float(x) / image_width)
xmax.append(float(x + width) / image_width)
ymin.append(float(y) / image_height)
ymax.append(float(y + height) / image_height)
is_crowd.append(object_annotations['iscrowd'])
category_id = int(object_annotations['category_id'])
category_ids.append(category_id)
category_names.append(category_index[category_id]['name'].encode('utf8'))
category_names.append(category_name)
area.append(object_annotations['area'])
if include_masks:
......@@ -197,6 +258,40 @@ def create_tf_example(image,
keypoints_visibility.extend([0] * len(_COCO_KEYPOINT_NAMES))
keypoints_name.extend(_COCO_KEYPOINT_NAMES)
num_keypoints.append(0)
if include_densepose:
annotation_id = object_annotations['id']
if (annotation_id in densepose_annotations_dict and
all(key in densepose_annotations_dict[annotation_id]
for key in densepose_keys)):
dp_annotations = densepose_annotations_dict[annotation_id]
num_densepose_annotation_used += 1
dp_num_points.append(len(dp_annotations['dp_I']))
dp_part_index.extend([int(i - _DP_PART_ID_OFFSET)
for i in dp_annotations['dp_I']])
# DensePose surface coordinates are defined on a [256, 256] grid
# relative to each instance box (i.e. absolute coordinates in range
# [0., 256.]). The following converts the coordinates
# so that they are expressed in normalized image coordinates.
dp_x_box_rel = [
clip_to_unit(val / 256.) for val in dp_annotations['dp_x']]
dp_x_norm = [(float(x) + x_box_rel * width) / image_width
for x_box_rel in dp_x_box_rel]
dp_y_box_rel = [
clip_to_unit(val / 256.) for val in dp_annotations['dp_y']]
dp_y_norm = [(float(y) + y_box_rel * height) / image_height
for y_box_rel in dp_y_box_rel]
dp_x.extend(dp_x_norm)
dp_y.extend(dp_y_norm)
dp_u.extend(dp_annotations['dp_U'])
dp_v.extend(dp_annotations['dp_V'])
else:
dp_num_points.append(0)
if (remove_non_person_images and
not any(name == b'person' for name in category_names)):
return (key, None, num_annotations_skipped,
num_keypoint_annotation_skipped, num_densepose_annotation_skipped)
feature_dict = {
'image/height':
dataset_util.int64_feature(image_height),
......@@ -243,15 +338,34 @@ def create_tf_example(image,
dataset_util.bytes_list_feature(keypoints_name))
num_keypoint_annotation_skipped = (
len(keypoint_annotations_dict) - num_keypoint_annotation_used)
if include_densepose:
feature_dict['image/object/densepose/num'] = (
dataset_util.int64_list_feature(dp_num_points))
feature_dict['image/object/densepose/part_index'] = (
dataset_util.int64_list_feature(dp_part_index))
feature_dict['image/object/densepose/x'] = (
dataset_util.float_list_feature(dp_x))
feature_dict['image/object/densepose/y'] = (
dataset_util.float_list_feature(dp_y))
feature_dict['image/object/densepose/u'] = (
dataset_util.float_list_feature(dp_u))
feature_dict['image/object/densepose/v'] = (
dataset_util.float_list_feature(dp_v))
num_densepose_annotation_skipped = (
len(densepose_annotations_dict) - num_densepose_annotation_used)
example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
return key, example, num_annotations_skipped, num_keypoint_annotation_skipped
return (key, example, num_annotations_skipped,
num_keypoint_annotation_skipped, num_densepose_annotation_skipped)
def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
output_path, include_masks,
num_shards,
keypoint_annotations_file=''):
keypoint_annotations_file='',
densepose_annotations_file='',
remove_non_person_annotations=False,
remove_non_person_images=False):
"""Loads COCO annotation json files and converts to tf.Record format.
Args:
......@@ -264,6 +378,12 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
keypoint_annotations_file: JSON file containing the person keypoint
annotations. If empty, then no person keypoint annotations will be
generated.
densepose_annotations_file: JSON file containing the DensePose annotations.
If empty, then no DensePose annotations will be generated.
remove_non_person_annotations: Whether to remove any annotations that are
not the "person" class.
remove_non_person_images: Whether to remove any images that do not contain
at least one "person" annotation.
"""
with contextlib2.ExitStack() as tf_record_close_stack, \
tf.gfile.GFile(annotations_file, 'r') as fid:
......@@ -288,7 +408,8 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
if image_id not in annotations_index:
missing_annotation_count += 1
annotations_index[image_id] = []
logging.info('%d images are missing annotations.', missing_annotation_count)
logging.info('%d images are missing annotations.',
missing_annotation_count)
keypoint_annotations_index = {}
if keypoint_annotations_file:
......@@ -301,8 +422,20 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
keypoint_annotations_index[image_id] = {}
keypoint_annotations_index[image_id][annotation['id']] = annotation
densepose_annotations_index = {}
if densepose_annotations_file:
with tf.gfile.GFile(densepose_annotations_file, 'r') as fid:
densepose_groundtruth_data = json.load(fid)
if 'annotations' in densepose_groundtruth_data:
for annotation in densepose_groundtruth_data['annotations']:
image_id = annotation['image_id']
if image_id not in densepose_annotations_index:
densepose_annotations_index[image_id] = {}
densepose_annotations_index[image_id][annotation['id']] = annotation
total_num_annotations_skipped = 0
total_num_keypoint_annotations_skipped = 0
total_num_densepose_annotations_skipped = 0
for idx, image in enumerate(images):
if idx % 100 == 0:
logging.info('On image %d of %d', idx, len(images))
......@@ -312,19 +445,31 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
keypoint_annotations_dict = {}
if image['id'] in keypoint_annotations_index:
keypoint_annotations_dict = keypoint_annotations_index[image['id']]
(_, tf_example, num_annotations_skipped,
num_keypoint_annotations_skipped) = create_tf_example(
densepose_annotations_dict = None
if densepose_annotations_file:
densepose_annotations_dict = {}
if image['id'] in densepose_annotations_index:
densepose_annotations_dict = densepose_annotations_index[image['id']]
(_, tf_example, num_annotations_skipped, num_keypoint_annotations_skipped,
num_densepose_annotations_skipped) = create_tf_example(
image, annotations_list, image_dir, category_index, include_masks,
keypoint_annotations_dict)
keypoint_annotations_dict, densepose_annotations_dict,
remove_non_person_annotations, remove_non_person_images)
total_num_annotations_skipped += num_annotations_skipped
total_num_keypoint_annotations_skipped += num_keypoint_annotations_skipped
total_num_densepose_annotations_skipped += (
num_densepose_annotations_skipped)
shard_idx = idx % num_shards
output_tfrecords[shard_idx].write(tf_example.SerializeToString())
if tf_example:
output_tfrecords[shard_idx].write(tf_example.SerializeToString())
logging.info('Finished writing, skipped %d annotations.',
total_num_annotations_skipped)
if keypoint_annotations_file:
logging.info('Finished writing, skipped %d keypoint annotations.',
total_num_keypoint_annotations_skipped)
if densepose_annotations_file:
logging.info('Finished writing, skipped %d DensePose annotations.',
total_num_densepose_annotations_skipped)
def main(_):
......@@ -347,20 +492,26 @@ def main(_):
train_output_path,
FLAGS.include_masks,
num_shards=100,
keypoint_annotations_file=FLAGS.train_keypoint_annotations_file)
keypoint_annotations_file=FLAGS.train_keypoint_annotations_file,
densepose_annotations_file=FLAGS.train_densepose_annotations_file,
remove_non_person_annotations=FLAGS.remove_non_person_annotations,
remove_non_person_images=FLAGS.remove_non_person_images)
_create_tf_record_from_coco_annotations(
FLAGS.val_annotations_file,
FLAGS.val_image_dir,
val_output_path,
FLAGS.include_masks,
num_shards=100,
keypoint_annotations_file=FLAGS.val_keypoint_annotations_file)
num_shards=50,
keypoint_annotations_file=FLAGS.val_keypoint_annotations_file,
densepose_annotations_file=FLAGS.val_densepose_annotations_file,
remove_non_person_annotations=FLAGS.remove_non_person_annotations,
remove_non_person_images=FLAGS.remove_non_person_images)
_create_tf_record_from_coco_annotations(
FLAGS.testdev_annotations_file,
FLAGS.test_image_dir,
testdev_output_path,
FLAGS.include_masks,
num_shards=100)
num_shards=50)
if __name__ == '__main__':
......
......@@ -89,7 +89,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
}
(_, example,
num_annotations_skipped, _) = create_coco_tf_record.create_tf_example(
num_annotations_skipped, _, _) = create_coco_tf_record.create_tf_example(
image, annotations_list, image_dir, category_index)
self.assertEqual(num_annotations_skipped, 0)
......@@ -156,7 +156,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
}
(_, example,
num_annotations_skipped, _) = create_coco_tf_record.create_tf_example(
num_annotations_skipped, _, _) = create_coco_tf_record.create_tf_example(
image, annotations_list, image_dir, category_index, include_masks=True)
self.assertEqual(num_annotations_skipped, 0)
......@@ -259,14 +259,14 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
}
}
(_, example, _,
num_keypoint_annotation_skipped) = create_coco_tf_record.create_tf_example(
image,
annotations_list,
image_dir,
category_index,
include_masks=False,
keypoint_annotations_dict=keypoint_annotations_dict)
_, example, _, num_keypoint_annotation_skipped, _ = (
create_coco_tf_record.create_tf_example(
image,
annotations_list,
image_dir,
category_index,
include_masks=False,
keypoint_annotations_dict=keypoint_annotations_dict))
self.assertEqual(num_keypoint_annotation_skipped, 0)
self._assertProtoEqual(
......@@ -310,6 +310,132 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
example.features.feature[
'image/object/keypoint/visibility'].int64_list.value, vv)
def test_create_tf_example_with_dense_pose(self):
image_dir = self.get_temp_dir()
image_file_name = 'tmp_image.jpg'
image_data = np.random.randint(low=0, high=256, size=(256, 256, 3)).astype(
np.uint8)
save_path = os.path.join(image_dir, image_file_name)
image = PIL.Image.fromarray(image_data, 'RGB')
image.save(save_path)
image = {
'file_name': image_file_name,
'height': 256,
'width': 256,
'id': 11,
}
min_x, min_y = 64, 64
max_x, max_y = 128, 128
keypoints = []
num_visible_keypoints = 0
xv = []
yv = []
vv = []
for _ in range(17):
xc = min_x + int(np.random.rand()*(max_x - min_x))
yc = min_y + int(np.random.rand()*(max_y - min_y))
vis = np.random.randint(0, 3)
xv.append(xc)
yv.append(yc)
vv.append(vis)
keypoints.extend([xc, yc, vis])
num_visible_keypoints += (vis > 0)
annotations_list = [{
'area': 0.5,
'iscrowd': False,
'image_id': 11,
'bbox': [64, 64, 128, 128],
'category_id': 1,
'id': 1000
}]
num_points = 45
dp_i = np.random.randint(1, 25, (num_points,)).astype(np.float32)
dp_u = np.random.randn(num_points)
dp_v = np.random.randn(num_points)
dp_x = np.random.rand(num_points)*256.
dp_y = np.random.rand(num_points)*256.
densepose_annotations_dict = {
1000: {
'dp_I': dp_i,
'dp_U': dp_u,
'dp_V': dp_v,
'dp_x': dp_x,
'dp_y': dp_y,
'bbox': [64, 64, 128, 128],
}
}
category_index = {
1: {
'name': 'person',
'id': 1
}
}
_, example, _, _, num_densepose_annotation_skipped = (
create_coco_tf_record.create_tf_example(
image,
annotations_list,
image_dir,
category_index,
include_masks=False,
densepose_annotations_dict=densepose_annotations_dict))
self.assertEqual(num_densepose_annotation_skipped, 0)
self._assertProtoEqual(
example.features.feature['image/height'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/width'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/filename'].bytes_list.value,
[six.b(image_file_name)])
self._assertProtoEqual(
example.features.feature['image/source_id'].bytes_list.value,
[six.b(str(image['id']))])
self._assertProtoEqual(
example.features.feature['image/format'].bytes_list.value,
[six.b('jpeg')])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/class/text'].bytes_list.value,
[six.b('person')])
self._assertProtoEqual(
example.features.feature['image/object/densepose/num'].int64_list.value,
[num_points])
self.assertAllEqual(
example.features.feature[
'image/object/densepose/part_index'].int64_list.value,
dp_i.astype(np.int64) - create_coco_tf_record._DP_PART_ID_OFFSET)
self.assertAllClose(
example.features.feature['image/object/densepose/u'].float_list.value,
dp_u)
self.assertAllClose(
example.features.feature['image/object/densepose/v'].float_list.value,
dp_v)
expected_dp_x = (64 + dp_x * 128. / 256.) / 256.
expected_dp_y = (64 + dp_y * 128. / 256.) / 256.
self.assertAllClose(
example.features.feature['image/object/densepose/x'].float_list.value,
expected_dp_x)
self.assertAllClose(
example.features.feature['image/object/densepose/y'].float_list.value,
expected_dp_y)
def test_create_sharded_tf_record(self):
tmp_dir = self.get_temp_dir()
image_paths = ['tmp1_image.jpg', 'tmp2_image.jpg']
......
......@@ -24,10 +24,18 @@ import six
import tensorflow.compat.v1 as tf
from object_detection.dataset_tools import seq_example_util
from object_detection.utils import tf_version
class SeqExampleUtilTest(tf.test.TestCase):
def materialize_tensors(self, list_of_tensors):
if tf_version.is_tf2():
return [tensor.numpy() for tensor in list_of_tensors]
else:
with self.cached_session() as sess:
return sess.run(list_of_tensors)
def test_make_unlabeled_example(self):
num_frames = 5
image_height = 100
......@@ -41,8 +49,7 @@ class SeqExampleUtilTest(tf.test.TestCase):
image_source_ids = [str(idx) for idx in range(num_frames)]
images_list = tf.unstack(images, axis=0)
encoded_images_list = [tf.io.encode_jpeg(image) for image in images_list]
with tf.Session() as sess:
encoded_images = sess.run(encoded_images_list)
encoded_images = self.materialize_tensors(encoded_images_list)
seq_example = seq_example_util.make_sequence_example(
dataset_name=dataset_name,
video_id=video_id,
......@@ -109,8 +116,7 @@ class SeqExampleUtilTest(tf.test.TestCase):
dtype=tf.int32), dtype=tf.uint8)
images_list = tf.unstack(images, axis=0)
encoded_images_list = [tf.io.encode_jpeg(image) for image in images_list]
with tf.Session() as sess:
encoded_images = sess.run(encoded_images_list)
encoded_images = self.materialize_tensors(encoded_images_list)
timestamps = [100000, 110000]
is_annotated = [1, 0]
bboxes = [
......@@ -208,8 +214,7 @@ class SeqExampleUtilTest(tf.test.TestCase):
dtype=tf.int32), dtype=tf.uint8)
images_list = tf.unstack(images, axis=0)
encoded_images_list = [tf.io.encode_jpeg(image) for image in images_list]
with tf.Session() as sess:
encoded_images = sess.run(encoded_images_list)
encoded_images = self.materialize_tensors(encoded_images_list)
bboxes = [
np.array([[0., 0., 0.75, 0.75],
[0., 0., 1., 1.]], dtype=np.float32),
......@@ -283,7 +288,7 @@ class SeqExampleUtilTest(tf.test.TestCase):
[0.75, 1.],
seq_feature_dict['region/bbox/xmax'].feature[0].float_list.value[:])
self.assertAllEqual(
['cat', 'frog'],
[b'cat', b'frog'],
seq_feature_dict['region/label/string'].feature[0].bytes_list.value[:])
self.assertAllClose(
[0.],
......@@ -327,7 +332,7 @@ class SeqExampleUtilTest(tf.test.TestCase):
[0.75],
seq_feature_dict['region/bbox/xmax'].feature[1].float_list.value[:])
self.assertAllEqual(
['cat'],
[b'cat'],
seq_feature_dict['region/label/string'].feature[1].bytes_list.value[:])
self.assertAllClose(
[],
......
......@@ -42,7 +42,7 @@ class OpenOutputTfrecordsTests(tf.test.TestCase):
tf_record_path = '{}-{:05d}-of-00010'.format(
os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), idx)
records = list(tf.python_io.tf_record_iterator(tf_record_path))
self.assertAllEqual(records, ['test_{}'.format(idx)])
self.assertAllEqual(records, ['test_{}'.format(idx).encode('utf-8')])
if __name__ == '__main__':
......
FROM tensorflow/tensorflow:1.15.2-gpu-py3
ARG DEBIAN_FRONTEND=noninteractive
# Install apt dependencies
RUN apt-get update && apt-get install -y \
git \
gpg-agent \
python3-cairocffi \
protobuf-compiler \
python3-pil \
python3-lxml \
python3-tk \
wget
# Install gcloud and gsutil commands
# https://cloud.google.com/sdk/docs/quickstart-debian-ubuntu
RUN export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" && \
echo "deb http://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \
apt-get update -y && apt-get install google-cloud-sdk -y
# Add new user to avoid running as root
RUN useradd -ms /bin/bash tensorflow
USER tensorflow
WORKDIR /home/tensorflow
# Install pip dependencies
RUN pip3 install --user absl-py
RUN pip3 install --user contextlib2
RUN pip3 install --user Cython
RUN pip3 install --user jupyter
RUN pip3 install --user matplotlib
RUN pip3 install --user pycocotools
RUN pip3 install --user tf-slim
# Copy this version of of the model garden into the image
COPY --chown=tensorflow . /home/tensorflow/models
# Compile protobuf configs
RUN (cd /home/tensorflow/models/research/ && protoc object_detection/protos/*.proto --python_out=.)
ENV PYTHONPATH $PYTHONPATH:/home/tensorflow/models/research/:/home/tensorflow/models/research/slim
ENV TF_CPP_MIN_LOG_LEVEL 3
# Tensorflow Object Detection on Docker
These instructions are experimental.
## Building and running:
```bash
# From the root of the git repository
docker build -f research/object_detection/dockerfiles/1.15/Dockerfile -t od .
docker run -it od
```
FROM tensorflow/tensorflow:2.2.0-gpu
ARG DEBIAN_FRONTEND=noninteractive
# Install apt dependencies
RUN apt-get update && apt-get install -y \
git \
gpg-agent \
python3-cairocffi \
protobuf-compiler \
python3-pil \
python3-lxml \
python3-tk \
wget
# Install gcloud and gsutil commands
# https://cloud.google.com/sdk/docs/quickstart-debian-ubuntu
RUN export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" && \
echo "deb http://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \
apt-get update -y && apt-get install google-cloud-sdk -y
# Add new user to avoid running as root
RUN useradd -ms /bin/bash tensorflow
USER tensorflow
WORKDIR /home/tensorflow
# Install pip dependencies
RUN pip3 install --user absl-py
RUN pip3 install --user contextlib2
RUN pip3 install --user Cython
RUN pip3 install --user jupyter
RUN pip3 install --user matplotlib
RUN pip3 install --user pycocotools
RUN pip3 install --user tf-slim
# Copy this version of of the model garden into the image
COPY --chown=tensorflow . /home/tensorflow/models
# Compile protobuf configs
RUN (cd /home/tensorflow/models/research/ && protoc object_detection/protos/*.proto --python_out=.)
ENV PYTHONPATH $PYTHONPATH:/home/tensorflow/models/research/:/home/tensorflow/models/research/slim
ENV TF_CPP_MIN_LOG_LEVEL 3
# Tensorflow Object Detection on Docker
These instructions are experimental.
## Building and running:
```bash
# From the root of the git repository
docker build -f research/object_detection/dockerfiles/2.2/Dockerfile -t od .
docker run -it od
```
......@@ -52,6 +52,8 @@ EVAL_METRICS_CLASS_DICT = {
coco_evaluation.CocoKeypointEvaluator,
'coco_mask_metrics':
coco_evaluation.CocoMaskEvaluator,
'coco_panoptic_metrics':
coco_evaluation.CocoPanopticSegmentationEvaluator,
'oid_challenge_detection_metrics':
object_detection_evaluation.OpenImagesDetectionChallengeEvaluator,
'oid_challenge_segmentation_metrics':
......
......@@ -18,6 +18,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import unittest
from absl.testing import parameterized
import numpy as np
......@@ -30,6 +31,7 @@ from object_detection.core import standard_fields as fields
from object_detection.metrics import coco_evaluation
from object_detection.protos import eval_pb2
from object_detection.utils import test_case
from object_detection.utils import tf_version
class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
......@@ -127,6 +129,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
{'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': False},
{'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': False}
)
@unittest.skipIf(tf_version.is_tf2(), 'Only compatible with TF1.X')
def test_get_eval_metric_ops_for_coco_detections(self, batch_size=1,
max_gt_boxes=None,
scale_to_absolute=False):
......@@ -155,6 +158,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
{'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': False},
{'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': False}
)
@unittest.skipIf(tf_version.is_tf2(), 'Only compatible with TF1.X')
def test_get_eval_metric_ops_for_coco_detections_and_masks(
self, batch_size=1, max_gt_boxes=None, scale_to_absolute=False):
eval_config = eval_pb2.EvalConfig()
......@@ -185,6 +189,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
{'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': False},
{'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': False}
)
@unittest.skipIf(tf_version.is_tf2(), 'Only compatible with TF1.X')
def test_get_eval_metric_ops_for_coco_detections_and_resized_masks(
self, batch_size=1, max_gt_boxes=None, scale_to_absolute=False):
eval_config = eval_pb2.EvalConfig()
......@@ -210,6 +215,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
self.assertAlmostEqual(1.0, metrics['DetectionBoxes_Precision/mAP'])
self.assertAlmostEqual(1.0, metrics['DetectionMasks_Precision/mAP'])
@unittest.skipIf(tf_version.is_tf2(), 'Only compatible with TF1.X')
def test_get_eval_metric_ops_raises_error_with_unsupported_metric(self):
eval_config = eval_pb2.EvalConfig()
eval_config.metrics_set.extend(['unsupported_metric'])
......@@ -334,63 +340,67 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
dtype=np.float32)
detection_keypoints = np.array([[0.0, 0.0], [0.5, 0.5], [1.0, 1.0]],
dtype=np.float32)
detections = {
detection_fields.detection_boxes:
tf.constant(detection_boxes),
detection_fields.detection_scores:
tf.constant([[1.], [1.]]),
detection_fields.detection_classes:
tf.constant([[1], [2]]),
detection_fields.num_detections:
tf.constant([1, 1]),
detection_fields.detection_keypoints:
tf.tile(
tf.reshape(
tf.constant(detection_keypoints), shape=[1, 1, 3, 2]),
multiples=[2, 1, 1, 1])
}
gt_boxes = detection_boxes
groundtruth = {
input_data_fields.groundtruth_boxes:
tf.constant(gt_boxes),
input_data_fields.groundtruth_classes:
tf.constant([[1.], [1.]]),
input_data_fields.groundtruth_keypoints:
tf.tile(
tf.reshape(
tf.constant(detection_keypoints), shape=[1, 1, 3, 2]),
multiples=[2, 1, 1, 1])
}
image = tf.zeros((2, 100, 100, 3), dtype=tf.float32)
true_image_shapes = tf.constant([[100, 100, 3], [50, 100, 3]])
original_image_spatial_shapes = tf.constant([[200, 200], [150, 300]])
result = eval_util.result_dict_for_batched_example(
image, key, detections, groundtruth,
scale_to_absolute=True,
true_image_shapes=true_image_shapes,
original_image_spatial_shapes=original_image_spatial_shapes,
max_gt_boxes=tf.constant(1))
with self.test_session() as sess:
result = sess.run(result)
self.assertAllEqual(
[[[0., 0., 200., 200.]], [[0.0, 0.0, 150., 150.]]],
result[input_data_fields.groundtruth_boxes])
self.assertAllClose([[[[0., 0.], [100., 100.], [200., 200.]]],
[[[0., 0.], [150., 150.], [300., 300.]]]],
result[input_data_fields.groundtruth_keypoints])
# Predictions from the model are not scaled.
self.assertAllEqual(
[[[0., 0., 200., 200.]], [[0.0, 0.0, 75., 150.]]],
result[detection_fields.detection_boxes])
self.assertAllClose([[[[0., 0.], [100., 100.], [200., 200.]]],
[[[0., 0.], [75., 150.], [150., 300.]]]],
result[detection_fields.detection_keypoints])
def graph_fn():
detections = {
detection_fields.detection_boxes:
tf.constant(detection_boxes),
detection_fields.detection_scores:
tf.constant([[1.], [1.]]),
detection_fields.detection_classes:
tf.constant([[1], [2]]),
detection_fields.num_detections:
tf.constant([1, 1]),
detection_fields.detection_keypoints:
tf.tile(
tf.reshape(
tf.constant(detection_keypoints), shape=[1, 1, 3, 2]),
multiples=[2, 1, 1, 1])
}
gt_boxes = detection_boxes
groundtruth = {
input_data_fields.groundtruth_boxes:
tf.constant(gt_boxes),
input_data_fields.groundtruth_classes:
tf.constant([[1.], [1.]]),
input_data_fields.groundtruth_keypoints:
tf.tile(
tf.reshape(
tf.constant(detection_keypoints), shape=[1, 1, 3, 2]),
multiples=[2, 1, 1, 1])
}
image = tf.zeros((2, 100, 100, 3), dtype=tf.float32)
true_image_shapes = tf.constant([[100, 100, 3], [50, 100, 3]])
original_image_spatial_shapes = tf.constant([[200, 200], [150, 300]])
result = eval_util.result_dict_for_batched_example(
image, key, detections, groundtruth,
scale_to_absolute=True,
true_image_shapes=true_image_shapes,
original_image_spatial_shapes=original_image_spatial_shapes,
max_gt_boxes=tf.constant(1))
return (result[input_data_fields.groundtruth_boxes],
result[input_data_fields.groundtruth_keypoints],
result[detection_fields.detection_boxes],
result[detection_fields.detection_keypoints])
(gt_boxes, gt_keypoints, detection_boxes,
detection_keypoints) = self.execute_cpu(graph_fn, [])
self.assertAllEqual(
[[[0., 0., 200., 200.]], [[0.0, 0.0, 150., 150.]]],
gt_boxes)
self.assertAllClose([[[[0., 0.], [100., 100.], [200., 200.]]],
[[[0., 0.], [150., 150.], [300., 300.]]]],
gt_keypoints)
# Predictions from the model are not scaled.
self.assertAllEqual(
[[[0., 0., 200., 200.]], [[0.0, 0.0, 75., 150.]]],
detection_boxes)
self.assertAllClose([[[[0., 0.], [100., 100.], [200., 200.]]],
[[[0., 0.], [75., 150.], [150., 300.]]]],
detection_keypoints)
if __name__ == '__main__':
......
......@@ -134,6 +134,30 @@ flags.DEFINE_string('config_override', '',
'text proto to override pipeline_config_path.')
flags.DEFINE_boolean('write_inference_graph', False,
'If true, writes inference graph to disk.')
flags.DEFINE_string('additional_output_tensor_names', None,
'Additional Tensors to output, to be specified as a comma '
'separated list of tensor names.')
flags.DEFINE_boolean('use_side_inputs', False,
'If True, uses side inputs as well as image inputs.')
flags.DEFINE_string('side_input_shapes', None,
'If use_side_inputs is True, this explicitly sets '
'the shape of the side input tensors to a fixed size. The '
'dimensions are to be provided as a comma-separated list '
'of integers. A value of -1 can be used for unknown '
'dimensions. A `/` denotes a break, starting the shape of '
'the next side input tensor. This flag is required if '
'using side inputs.')
flags.DEFINE_string('side_input_types', None,
'If use_side_inputs is True, this explicitly sets '
'the type of the side input tensors. The '
'dimensions are to be provided as a comma-separated list '
'of types, each of `string`, `integer`, or `float`. '
'This flag is required if using side inputs.')
flags.DEFINE_string('side_input_names', None,
'If use_side_inputs is True, this explicitly sets '
'the names of the side input tensors required by the model '
'assuming the names will be a comma-separated list of '
'strings. This flag is required if using side inputs.')
tf.app.flags.mark_flag_as_required('pipeline_config_path')
tf.app.flags.mark_flag_as_required('trained_checkpoint_prefix')
tf.app.flags.mark_flag_as_required('output_directory')
......@@ -152,10 +176,30 @@ def main(_):
]
else:
input_shape = None
if FLAGS.use_side_inputs:
side_input_shapes, side_input_names, side_input_types = (
exporter.parse_side_inputs(
FLAGS.side_input_shapes,
FLAGS.side_input_names,
FLAGS.side_input_types))
else:
side_input_shapes = None
side_input_names = None
side_input_types = None
if FLAGS.additional_output_tensor_names:
additional_output_tensor_names = list(
FLAGS.additional_output_tensor_names.split(','))
else:
additional_output_tensor_names = None
exporter.export_inference_graph(
FLAGS.input_type, pipeline_config, FLAGS.trained_checkpoint_prefix,
FLAGS.output_directory, input_shape=input_shape,
write_inference_graph=FLAGS.write_inference_graph)
write_inference_graph=FLAGS.write_inference_graph,
additional_output_tensor_names=additional_output_tensor_names,
use_side_inputs=FLAGS.use_side_inputs,
side_input_shapes=side_input_shapes,
side_input_names=side_input_names,
side_input_types=side_input_types)
if __name__ == '__main__':
......
......@@ -24,16 +24,19 @@ import tensorflow.compat.v1 as tf
from tensorflow.core.framework import attr_value_pb2
from tensorflow.core.framework import types_pb2
from tensorflow.core.protobuf import saver_pb2
from tensorflow.tools.graph_transforms import TransformGraph
from object_detection import exporter
from object_detection.builders import graph_rewriter_builder
from object_detection.builders import model_builder
from object_detection.builders import post_processing_builder
from object_detection.core import box_list
from object_detection.utils import tf_version
_DEFAULT_NUM_CHANNELS = 3
_DEFAULT_NUM_COORD_BOX = 4
if tf_version.is_tf1():
from tensorflow.tools.graph_transforms import TransformGraph # pylint: disable=g-import-not-at-top
def get_const_center_size_encoded_anchors(anchors):
"""Exports center-size encoded anchors as a constant tensor.
......
......@@ -18,6 +18,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import unittest
import numpy as np
import six
import tensorflow.compat.v1 as tf
......@@ -32,6 +33,7 @@ from object_detection.core import model
from object_detection.protos import graph_rewriter_pb2
from object_detection.protos import pipeline_pb2
from object_detection.protos import post_processing_pb2
from object_detection.utils import tf_version
# pylint: disable=g-import-not-at-top
......@@ -72,6 +74,9 @@ class FakeModel(model.DetectionModel):
def restore_map(self, checkpoint_path, from_detection_checkpoint):
pass
def restore_from_objects(self, fine_tune_checkpoint_type):
pass
def loss(self, prediction_dict, true_image_shapes):
pass
......@@ -82,6 +87,7 @@ class FakeModel(model.DetectionModel):
pass
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class ExportTfliteGraphTest(tf.test.TestCase):
def _save_checkpoint_from_mock_model(self,
......@@ -413,7 +419,7 @@ class ExportTfliteGraphTest(tf.test.TestCase):
tflite_graph_file = self._export_graph_with_postprocessing_op(
pipeline_config)
self.assertTrue(os.path.exists(tflite_graph_file))
mock_get.assert_called_once()
self.assertEqual(1, mock_get.call_count)
if __name__ == '__main__':
......
......@@ -39,6 +39,54 @@ except ImportError:
freeze_graph_with_def_protos = freeze_graph.freeze_graph_with_def_protos
def parse_side_inputs(side_input_shapes_string, side_input_names_string,
side_input_types_string):
"""Parses side input flags.
Args:
side_input_shapes_string: The shape of the side input tensors, provided as a
comma-separated list of integers. A value of -1 is used for unknown
dimensions. A `/` denotes a break, starting the shape of the next side
input tensor.
side_input_names_string: The names of the side input tensors, provided as a
comma-separated list of strings.
side_input_types_string: The type of the side input tensors, provided as a
comma-separated list of types, each of `string`, `integer`, or `float`.
Returns:
side_input_shapes: A list of shapes.
side_input_names: A list of strings.
side_input_types: A list of tensorflow dtypes.
"""
if side_input_shapes_string:
side_input_shapes = []
for side_input_shape_list in side_input_shapes_string.split('/'):
side_input_shape = [
int(dim) if dim != '-1' else None
for dim in side_input_shape_list.split(',')
]
side_input_shapes.append(side_input_shape)
else:
raise ValueError('When using side_inputs, side_input_shapes must be '
'specified in the input flags.')
if side_input_names_string:
side_input_names = list(side_input_names_string.split(','))
else:
raise ValueError('When using side_inputs, side_input_names must be '
'specified in the input flags.')
if side_input_types_string:
typelookup = {'float': tf.float32, 'int': tf.int32, 'string': tf.string}
side_input_types = [
typelookup[side_input_type]
for side_input_type in side_input_types_string.split(',')
]
else:
raise ValueError('When using side_inputs, side_input_types must be '
'specified in the input flags.')
return side_input_shapes, side_input_names, side_input_types
def rewrite_nn_resize_op(is_quantized=False):
"""Replaces a custom nearest-neighbor resize op with the Tensorflow version.
......@@ -140,6 +188,14 @@ def _image_tensor_input_placeholder(input_shape=None):
return input_tensor, input_tensor
def _side_input_tensor_placeholder(side_input_shape, side_input_name,
side_input_type):
"""Returns side input placeholder and side input tensor."""
side_input_tensor = tf.placeholder(
dtype=side_input_type, shape=side_input_shape, name=side_input_name)
return side_input_tensor, side_input_tensor
def _tf_example_input_placeholder(input_shape=None):
"""Returns input that accepts a batch of strings with tf examples.
......@@ -200,7 +256,7 @@ input_placeholder_fn_map = {
'image_tensor': _image_tensor_input_placeholder,
'encoded_image_string_tensor':
_encoded_image_string_tensor_input_placeholder,
'tf_example': _tf_example_input_placeholder,
'tf_example': _tf_example_input_placeholder
}
......@@ -312,7 +368,7 @@ def write_saved_model(saved_model_path,
Args:
saved_model_path: Path to write SavedModel.
frozen_graph_def: tf.GraphDef holding frozen graph.
inputs: The input placeholder tensor.
inputs: A tensor dictionary containing the inputs to a DetectionModel.
outputs: A tensor dictionary containing the outputs of a DetectionModel.
"""
with tf.Graph().as_default():
......@@ -322,8 +378,13 @@ def write_saved_model(saved_model_path,
builder = tf.saved_model.builder.SavedModelBuilder(saved_model_path)
tensor_info_inputs = {
'inputs': tf.saved_model.utils.build_tensor_info(inputs)}
tensor_info_inputs = {}
if isinstance(inputs, dict):
for k, v in inputs.items():
tensor_info_inputs[k] = tf.saved_model.utils.build_tensor_info(v)
else:
tensor_info_inputs['inputs'] = tf.saved_model.utils.build_tensor_info(
inputs)
tensor_info_outputs = {}
for k, v in outputs.items():
tensor_info_outputs[k] = tf.saved_model.utils.build_tensor_info(v)
......@@ -364,11 +425,11 @@ def write_graph_and_checkpoint(inference_graph_def,
def _get_outputs_from_inputs(input_tensors, detection_model,
output_collection_name):
output_collection_name, **side_inputs):
inputs = tf.cast(input_tensors, dtype=tf.float32)
preprocessed_inputs, true_image_shapes = detection_model.preprocess(inputs)
output_tensors = detection_model.predict(
preprocessed_inputs, true_image_shapes)
preprocessed_inputs, true_image_shapes, **side_inputs)
postprocessed_tensors = detection_model.postprocess(
output_tensors, true_image_shapes)
return add_output_tensor_nodes(postprocessed_tensors,
......@@ -376,32 +437,45 @@ def _get_outputs_from_inputs(input_tensors, detection_model,
def build_detection_graph(input_type, detection_model, input_shape,
output_collection_name, graph_hook_fn):
output_collection_name, graph_hook_fn,
use_side_inputs=False, side_input_shapes=None,
side_input_names=None, side_input_types=None):
"""Build the detection graph."""
if input_type not in input_placeholder_fn_map:
raise ValueError('Unknown input type: {}'.format(input_type))
placeholder_args = {}
side_inputs = {}
if input_shape is not None:
if (input_type != 'image_tensor' and
input_type != 'encoded_image_string_tensor' and
input_type != 'tf_example'):
input_type != 'tf_example' and
input_type != 'tf_sequence_example'):
raise ValueError('Can only specify input shape for `image_tensor`, '
'`encoded_image_string_tensor`, or `tf_example` '
'inputs.')
'`encoded_image_string_tensor`, `tf_example`, '
' or `tf_sequence_example` inputs.')
placeholder_args['input_shape'] = input_shape
placeholder_tensor, input_tensors = input_placeholder_fn_map[input_type](
**placeholder_args)
placeholder_tensors = {'inputs': placeholder_tensor}
if use_side_inputs:
for idx, side_input_name in enumerate(side_input_names):
side_input_placeholder, side_input = _side_input_tensor_placeholder(
side_input_shapes[idx], side_input_name, side_input_types[idx])
print(side_input)
side_inputs[side_input_name] = side_input
placeholder_tensors[side_input_name] = side_input_placeholder
outputs = _get_outputs_from_inputs(
input_tensors=input_tensors,
detection_model=detection_model,
output_collection_name=output_collection_name)
output_collection_name=output_collection_name,
**side_inputs)
# Add global step to the graph.
slim.get_or_create_global_step()
if graph_hook_fn: graph_hook_fn()
return outputs, placeholder_tensor
return outputs, placeholder_tensors
def _export_inference_graph(input_type,
......@@ -414,7 +488,11 @@ def _export_inference_graph(input_type,
output_collection_name='inference_op',
graph_hook_fn=None,
write_inference_graph=False,
temp_checkpoint_prefix=''):
temp_checkpoint_prefix='',
use_side_inputs=False,
side_input_shapes=None,
side_input_names=None,
side_input_types=None):
"""Export helper."""
tf.gfile.MakeDirs(output_directory)
frozen_graph_path = os.path.join(output_directory,
......@@ -422,12 +500,16 @@ def _export_inference_graph(input_type,
saved_model_path = os.path.join(output_directory, 'saved_model')
model_path = os.path.join(output_directory, 'model.ckpt')
outputs, placeholder_tensor = build_detection_graph(
outputs, placeholder_tensor_dict = build_detection_graph(
input_type=input_type,
detection_model=detection_model,
input_shape=input_shape,
output_collection_name=output_collection_name,
graph_hook_fn=graph_hook_fn)
graph_hook_fn=graph_hook_fn,
use_side_inputs=use_side_inputs,
side_input_shapes=side_input_shapes,
side_input_names=side_input_names,
side_input_types=side_input_types)
profile_inference_graph(tf.get_default_graph())
saver_kwargs = {}
......@@ -464,7 +546,8 @@ def _export_inference_graph(input_type,
f.write(str(inference_graph_def))
if additional_output_tensor_names is not None:
output_node_names = ','.join(outputs.keys()+additional_output_tensor_names)
output_node_names = ','.join(list(outputs.keys())+(
additional_output_tensor_names))
else:
output_node_names = ','.join(outputs.keys())
......@@ -480,7 +563,7 @@ def _export_inference_graph(input_type,
initializer_nodes='')
write_saved_model(saved_model_path, frozen_graph_def,
placeholder_tensor, outputs)
placeholder_tensor_dict, outputs)
def export_inference_graph(input_type,
......@@ -490,7 +573,11 @@ def export_inference_graph(input_type,
input_shape=None,
output_collection_name='inference_op',
additional_output_tensor_names=None,
write_inference_graph=False):
write_inference_graph=False,
use_side_inputs=False,
side_input_shapes=None,
side_input_names=None,
side_input_types=None):
"""Exports inference graph for the model specified in the pipeline config.
Args:
......@@ -506,6 +593,13 @@ def export_inference_graph(input_type,
additional_output_tensor_names: list of additional output
tensors to include in the frozen graph.
write_inference_graph: If true, writes inference graph to disk.
use_side_inputs: If True, the model requires side_inputs.
side_input_shapes: List of shapes of the side input tensors,
required if use_side_inputs is True.
side_input_names: List of names of the side input tensors,
required if use_side_inputs is True.
side_input_types: List of types of the side input tensors,
required if use_side_inputs is True.
"""
detection_model = model_builder.build(pipeline_config.model,
is_training=False)
......@@ -524,7 +618,11 @@ def export_inference_graph(input_type,
input_shape,
output_collection_name,
graph_hook_fn=graph_rewriter_fn,
write_inference_graph=write_inference_graph)
write_inference_graph=write_inference_graph,
use_side_inputs=use_side_inputs,
side_input_shapes=side_input_shapes,
side_input_names=side_input_names,
side_input_types=side_input_types)
pipeline_config.eval_config.use_moving_averages = False
config_util.save_pipeline_config(pipeline_config, output_directory)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment