Commit 657dcda5 authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

pull latest

parents 26e24e21 e6017471
......@@ -66,6 +66,11 @@ class InputDataFields(object):
groundtruth_keypoint_weights: groundtruth weight factor for keypoints.
groundtruth_label_weights: groundtruth label weights.
groundtruth_weights: groundtruth weight factor for bounding boxes.
groundtruth_dp_num_points: The number of DensePose sampled points for each
instance.
groundtruth_dp_part_ids: Part indices for DensePose points.
groundtruth_dp_surface_coords: Image locations and UV coordinates for
DensePose points.
num_groundtruth_boxes: number of groundtruth boxes.
is_annotated: whether an image has been labeled or not.
true_image_shapes: true shapes of images in the resized images, as resized
......@@ -108,6 +113,9 @@ class InputDataFields(object):
groundtruth_keypoint_weights = 'groundtruth_keypoint_weights'
groundtruth_label_weights = 'groundtruth_label_weights'
groundtruth_weights = 'groundtruth_weights'
groundtruth_dp_num_points = 'groundtruth_dp_num_points'
groundtruth_dp_part_ids = 'groundtruth_dp_part_ids'
groundtruth_dp_surface_coords = 'groundtruth_dp_surface_coords'
num_groundtruth_boxes = 'num_groundtruth_boxes'
is_annotated = 'is_annotated'
true_image_shape = 'true_image_shape'
......
......@@ -30,6 +30,7 @@ from object_detection.core import data_decoder
from object_detection.core import standard_fields as fields
from object_detection.protos import input_reader_pb2
from object_detection.utils import label_map_util
from object_detection.utils import shape_utils
# pylint: disable=g-import-not-at-top
try:
......@@ -170,7 +171,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
num_additional_channels=0,
load_multiclass_scores=False,
load_context_features=False,
expand_hierarchy_labels=False):
expand_hierarchy_labels=False,
load_dense_pose=False):
"""Constructor sets keys_to_features and items_to_handlers.
Args:
......@@ -201,6 +203,7 @@ class TfExampleDecoder(data_decoder.DataDecoder):
account the provided hierarchy in the label_map_proto_file. For positive
classes, the labels are extended to ancestor. For negative classes,
the labels are expanded to descendants.
load_dense_pose: Whether to load DensePose annotations.
Raises:
ValueError: If `instance_mask_type` option is not one of
......@@ -371,6 +374,34 @@ class TfExampleDecoder(data_decoder.DataDecoder):
self._decode_png_instance_masks))
else:
raise ValueError('Did not recognize the `instance_mask_type` option.')
if load_dense_pose:
self.keys_to_features['image/object/densepose/num'] = (
tf.VarLenFeature(tf.int64))
self.keys_to_features['image/object/densepose/part_index'] = (
tf.VarLenFeature(tf.int64))
self.keys_to_features['image/object/densepose/x'] = (
tf.VarLenFeature(tf.float32))
self.keys_to_features['image/object/densepose/y'] = (
tf.VarLenFeature(tf.float32))
self.keys_to_features['image/object/densepose/u'] = (
tf.VarLenFeature(tf.float32))
self.keys_to_features['image/object/densepose/v'] = (
tf.VarLenFeature(tf.float32))
self.items_to_handlers[
fields.InputDataFields.groundtruth_dp_num_points] = (
slim_example_decoder.Tensor('image/object/densepose/num'))
self.items_to_handlers[fields.InputDataFields.groundtruth_dp_part_ids] = (
slim_example_decoder.ItemHandlerCallback(
['image/object/densepose/part_index',
'image/object/densepose/num'], self._dense_pose_part_indices))
self.items_to_handlers[
fields.InputDataFields.groundtruth_dp_surface_coords] = (
slim_example_decoder.ItemHandlerCallback(
['image/object/densepose/x', 'image/object/densepose/y',
'image/object/densepose/u', 'image/object/densepose/v',
'image/object/densepose/num'],
self._dense_pose_surface_coordinates))
if label_map_proto_file:
# If the label_map_proto is provided, try to use it in conjunction with
# the class text, and fall back to a materialized ID.
......@@ -547,6 +578,14 @@ class TfExampleDecoder(data_decoder.DataDecoder):
group_of = fields.InputDataFields.groundtruth_group_of
tensor_dict[group_of] = tf.cast(tensor_dict[group_of], dtype=tf.bool)
if fields.InputDataFields.groundtruth_dp_num_points in tensor_dict:
tensor_dict[fields.InputDataFields.groundtruth_dp_num_points] = tf.cast(
tensor_dict[fields.InputDataFields.groundtruth_dp_num_points],
dtype=tf.int32)
tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids] = tf.cast(
tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids],
dtype=tf.int32)
return tensor_dict
def _reshape_keypoints(self, keys_to_tensors):
......@@ -697,6 +736,97 @@ class TfExampleDecoder(data_decoder.DataDecoder):
lambda: tf.map_fn(decode_png_mask, png_masks, dtype=tf.float32),
lambda: tf.zeros(tf.cast(tf.stack([0, height, width]), dtype=tf.int32)))
def _dense_pose_part_indices(self, keys_to_tensors):
"""Creates a tensor that contains part indices for each DensePose point.
Args:
keys_to_tensors: a dictionary from keys to tensors.
Returns:
A 2-D int32 tensor of shape [num_instances, num_points] where each element
contains the DensePose part index (0-23). The value `num_points`
corresponds to the maximum number of sampled points across all instances
in the image. Note that instances with less sampled points will be padded
with zeros in the last dimension.
"""
num_points_per_instances = keys_to_tensors['image/object/densepose/num']
part_index = keys_to_tensors['image/object/densepose/part_index']
if isinstance(num_points_per_instances, tf.SparseTensor):
num_points_per_instances = tf.sparse_tensor_to_dense(
num_points_per_instances)
if isinstance(part_index, tf.SparseTensor):
part_index = tf.sparse_tensor_to_dense(part_index)
part_index = tf.cast(part_index, dtype=tf.int32)
max_points_per_instance = tf.cast(
tf.math.reduce_max(num_points_per_instances), dtype=tf.int32)
num_points_cumulative = tf.concat([
[0], tf.math.cumsum(num_points_per_instances)], axis=0)
def pad_parts_tensor(instance_ind):
points_range_start = num_points_cumulative[instance_ind]
points_range_end = num_points_cumulative[instance_ind + 1]
part_inds = part_index[points_range_start:points_range_end]
return shape_utils.pad_or_clip_nd(part_inds,
output_shape=[max_points_per_instance])
return tf.map_fn(pad_parts_tensor,
tf.range(tf.size(num_points_per_instances)),
dtype=tf.int32)
def _dense_pose_surface_coordinates(self, keys_to_tensors):
"""Creates a tensor that contains surface coords for each DensePose point.
Args:
keys_to_tensors: a dictionary from keys to tensors.
Returns:
A 3-D float32 tensor of shape [num_instances, num_points, 4] where each
point contains (y, x, v, u) data for each sampled DensePose point. The
(y, x) coordinate has normalized image locations for the point, and (v, u)
contains the surface coordinate (also normalized) for the part. The value
`num_points` corresponds to the maximum number of sampled points across
all instances in the image. Note that instances with less sampled points
will be padded with zeros in dim=1.
"""
num_points_per_instances = keys_to_tensors['image/object/densepose/num']
dp_y = keys_to_tensors['image/object/densepose/y']
dp_x = keys_to_tensors['image/object/densepose/x']
dp_v = keys_to_tensors['image/object/densepose/v']
dp_u = keys_to_tensors['image/object/densepose/u']
if isinstance(num_points_per_instances, tf.SparseTensor):
num_points_per_instances = tf.sparse_tensor_to_dense(
num_points_per_instances)
if isinstance(dp_y, tf.SparseTensor):
dp_y = tf.sparse_tensor_to_dense(dp_y)
if isinstance(dp_x, tf.SparseTensor):
dp_x = tf.sparse_tensor_to_dense(dp_x)
if isinstance(dp_v, tf.SparseTensor):
dp_v = tf.sparse_tensor_to_dense(dp_v)
if isinstance(dp_u, tf.SparseTensor):
dp_u = tf.sparse_tensor_to_dense(dp_u)
max_points_per_instance = tf.cast(
tf.math.reduce_max(num_points_per_instances), dtype=tf.int32)
num_points_cumulative = tf.concat([
[0], tf.math.cumsum(num_points_per_instances)], axis=0)
def pad_surface_coordinates_tensor(instance_ind):
"""Pads DensePose surface coordinates for each instance."""
points_range_start = num_points_cumulative[instance_ind]
points_range_end = num_points_cumulative[instance_ind + 1]
y = dp_y[points_range_start:points_range_end]
x = dp_x[points_range_start:points_range_end]
v = dp_v[points_range_start:points_range_end]
u = dp_u[points_range_start:points_range_end]
# Create [num_points_i, 4] tensor, where num_points_i is the number of
# sampled points for instance i.
unpadded_tensor = tf.stack([y, x, v, u], axis=1)
return shape_utils.pad_or_clip_nd(
unpadded_tensor, output_shape=[max_points_per_instance, 4])
return tf.map_fn(pad_surface_coordinates_tensor,
tf.range(tf.size(num_points_per_instances)),
dtype=tf.float32)
def _expand_image_label_hierarchy(self, image_classes, image_confidences):
"""Expand image level labels according to the hierarchy.
......
......@@ -1096,8 +1096,8 @@ class TfExampleDecoderTest(test_case.TestCase):
return example_decoder.decode(tf.convert_to_tensor(example))
tensor_dict = self.execute_cpu(graph_fn, [])
self.assertTrue(
fields.InputDataFields.groundtruth_instance_masks not in tensor_dict)
self.assertNotIn(fields.InputDataFields.groundtruth_instance_masks,
tensor_dict)
def testDecodeImageLabels(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
......@@ -1116,8 +1116,7 @@ class TfExampleDecoderTest(test_case.TestCase):
return example_decoder.decode(tf.convert_to_tensor(example))
tensor_dict = self.execute_cpu(graph_fn_1, [])
self.assertTrue(
fields.InputDataFields.groundtruth_image_classes in tensor_dict)
self.assertIn(fields.InputDataFields.groundtruth_image_classes, tensor_dict)
self.assertAllEqual(
tensor_dict[fields.InputDataFields.groundtruth_image_classes],
np.array([1, 2]))
......@@ -1152,8 +1151,7 @@ class TfExampleDecoderTest(test_case.TestCase):
return example_decoder.decode(tf.convert_to_tensor(example))
tensor_dict = self.execute_cpu(graph_fn_2, [])
self.assertTrue(
fields.InputDataFields.groundtruth_image_classes in tensor_dict)
self.assertIn(fields.InputDataFields.groundtruth_image_classes, tensor_dict)
self.assertAllEqual(
tensor_dict[fields.InputDataFields.groundtruth_image_classes],
np.array([1, 3]))
......@@ -1345,6 +1343,93 @@ class TfExampleDecoderTest(test_case.TestCase):
expected_image_confidence,
tensor_dict[fields.InputDataFields.groundtruth_image_confidences])
def testDecodeDensePose(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg, _ = self._create_encoded_and_decoded_data(
image_tensor, 'jpeg')
bbox_ymins = [0.0, 4.0, 2.0]
bbox_xmins = [1.0, 5.0, 8.0]
bbox_ymaxs = [2.0, 6.0, 1.0]
bbox_xmaxs = [3.0, 7.0, 3.3]
densepose_num = [0, 4, 2]
densepose_part_index = [2, 2, 3, 4, 2, 9]
densepose_x = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]
densepose_y = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4]
densepose_u = [0.01, 0.02, 0.03, 0.04, 0.05, 0.06]
densepose_v = [0.99, 0.98, 0.97, 0.96, 0.95, 0.94]
def graph_fn():
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature(six.b('jpeg')),
'image/object/bbox/ymin':
dataset_util.float_list_feature(bbox_ymins),
'image/object/bbox/xmin':
dataset_util.float_list_feature(bbox_xmins),
'image/object/bbox/ymax':
dataset_util.float_list_feature(bbox_ymaxs),
'image/object/bbox/xmax':
dataset_util.float_list_feature(bbox_xmaxs),
'image/object/densepose/num':
dataset_util.int64_list_feature(densepose_num),
'image/object/densepose/part_index':
dataset_util.int64_list_feature(densepose_part_index),
'image/object/densepose/x':
dataset_util.float_list_feature(densepose_x),
'image/object/densepose/y':
dataset_util.float_list_feature(densepose_y),
'image/object/densepose/u':
dataset_util.float_list_feature(densepose_u),
'image/object/densepose/v':
dataset_util.float_list_feature(densepose_v),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder(
load_dense_pose=True)
output = example_decoder.decode(tf.convert_to_tensor(example))
dp_num_points = output[fields.InputDataFields.groundtruth_dp_num_points]
dp_part_ids = output[fields.InputDataFields.groundtruth_dp_part_ids]
dp_surface_coords = output[
fields.InputDataFields.groundtruth_dp_surface_coords]
return dp_num_points, dp_part_ids, dp_surface_coords
dp_num_points, dp_part_ids, dp_surface_coords = self.execute_cpu(
graph_fn, [])
expected_dp_num_points = [0, 4, 2]
expected_dp_part_ids = [
[0, 0, 0, 0],
[2, 2, 3, 4],
[2, 9, 0, 0]
]
expected_dp_surface_coords = np.array(
[
# Instance 0 (no points).
[[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]],
# Instance 1 (4 points).
[[0.9, 0.1, 0.99, 0.01],
[0.8, 0.2, 0.98, 0.02],
[0.7, 0.3, 0.97, 0.03],
[0.6, 0.4, 0.96, 0.04]],
# Instance 2 (2 points).
[[0.5, 0.5, 0.95, 0.05],
[0.4, 0.6, 0.94, 0.06],
[0., 0., 0., 0.],
[0., 0., 0., 0.]],
], dtype=np.float32)
self.assertAllEqual(dp_num_points, expected_dp_num_points)
self.assertAllEqual(dp_part_ids, expected_dp_part_ids)
self.assertAllClose(dp_surface_coords, expected_dp_surface_coords)
if __name__ == '__main__':
tf.test.main()
......@@ -43,6 +43,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import copy
import datetime
import io
......@@ -51,62 +52,11 @@ import json
import os
from absl import app
from absl import flags
import apache_beam as beam
import numpy as np
import PIL.Image
import six
import tensorflow as tf
from apache_beam import runners
flags.DEFINE_string('input_tfrecord', None, 'TFRecord containing images in '
'tf.Example format for object detection, with bounding'
'boxes and contextual feature embeddings.')
flags.DEFINE_string('output_tfrecord', None,
'TFRecord containing images in tf.Example format, with '
'added contextual memory banks.')
flags.DEFINE_string('sequence_key', None, 'Key to use when grouping sequences: '
'so far supports `image/seq_id` and `image/location`.')
flags.DEFINE_string('time_horizon', None, 'What time horizon to use when '
'splitting the data, if any. Options are: `year`, `month`,'
' `week`, `day `, `hour`, `minute`, `None`.')
flags.DEFINE_integer('subsample_context_features_rate', 0, 'Whether to '
'subsample the context_features, and if so how many to '
'sample. If the rate is set to X, it will sample context '
'from 1 out of every X images. Default is sampling from '
'every image, which is X=0.')
flags.DEFINE_boolean('reduce_image_size', True, 'downsamples images to'
'have longest side max_image_dimension, maintaining aspect'
' ratio')
flags.DEFINE_integer('max_image_dimension', 1024, 'sets max image dimension')
flags.DEFINE_boolean('add_context_features', True, 'adds a memory bank of'
'embeddings to each clip')
flags.DEFINE_boolean('sorted_image_ids', True, 'whether the image source_ids '
'are sortable to deal with date_captured tie-breaks')
flags.DEFINE_string('image_ids_to_keep', 'All', 'path to .json list of image'
'ids to keep, used for ground truth eval creation')
flags.DEFINE_boolean('keep_context_features_image_id_list', False, 'Whether or '
'not to keep a list of the image_ids corresponding to the '
'memory bank')
flags.DEFINE_boolean('keep_only_positives', False, 'Whether or not to '
'keep only positive boxes based on score')
flags.DEFINE_boolean('keep_only_positives_gt', False, 'Whether or not to '
'keep only positive boxes based on gt class')
flags.DEFINE_float('context_features_score_threshold', 0.7, 'What score '
'threshold to use for boxes in context_features')
flags.DEFINE_integer('max_num_elements_in_context_features', 2000, 'Sets max '
'num elements per memory bank')
flags.DEFINE_integer('num_shards', 0, 'Number of output shards.')
flags.DEFINE_string('output_type', 'tf_sequence_example', 'Output type, one of '
'`tf_example`, `tf_sequence_example`')
flags.DEFINE_integer('max_clip_length', None, 'Max length for sequence '
'example outputs.')
FLAGS = flags.FLAGS
DEFAULT_FEATURE_LENGTH = 2057
import tensorflow.compat.v1 as tf
class ReKeyDataFn(beam.DoFn):
......@@ -406,7 +356,8 @@ class GenerateContextFn(beam.DoFn):
keep_only_positives_gt=False,
max_num_elements_in_context_features=5000,
pad_context_features=False,
output_type='tf_example', max_clip_length=None):
output_type='tf_example', max_clip_length=None,
context_feature_length=2057):
"""Initialization function.
Args:
......@@ -432,6 +383,8 @@ class GenerateContextFn(beam.DoFn):
output_type: What type of output, tf_example of tf_sequence_example
max_clip_length: The maximum length of a sequence example, before
splitting into multiple
context_feature_length: The length of the context feature embeddings
stored in the input data.
"""
self._session = None
self._num_examples_processed = beam.metrics.Metrics.counter(
......@@ -456,6 +409,7 @@ class GenerateContextFn(beam.DoFn):
self._context_features_score_threshold = context_features_score_threshold
self._max_num_elements_in_context_features = (
max_num_elements_in_context_features)
self._context_feature_length = context_feature_length
self._images_kept = beam.metrics.Metrics.counter(
'sequence_data_generation', 'images_kept')
......@@ -506,9 +460,9 @@ class GenerateContextFn(beam.DoFn):
context_features_image_id_list.append(example_image_id)
if not example_embedding:
example_embedding.append(np.zeros(DEFAULT_FEATURE_LENGTH))
example_embedding.append(np.zeros(self._context_feature_length))
feature_length = DEFAULT_FEATURE_LENGTH
feature_length = self._context_feature_length
# If the example_list is not empty and image/embedding_length is in the
# featture dict, feature_length will be assigned to that. Otherwise, it will
......@@ -703,7 +657,8 @@ class GenerateContextFn(beam.DoFn):
return list_of_examples
def construct_pipeline(input_tfrecord,
def construct_pipeline(pipeline,
input_tfrecord,
output_tfrecord,
sequence_key,
time_horizon=None,
......@@ -720,10 +675,12 @@ def construct_pipeline(input_tfrecord,
max_num_elements_in_context_features=5000,
num_shards=0,
output_type='tf_example',
max_clip_length=None):
max_clip_length=None,
context_feature_length=2057):
"""Returns a beam pipeline to run object detection inference.
Args:
pipeline: Initialized beam pipeline.
input_tfrecord: An TFRecord of tf.train.Example protos containing images.
output_tfrecord: An TFRecord of tf.train.Example protos that contain images
in the input TFRecord and the detections from the model.
......@@ -755,91 +712,224 @@ def construct_pipeline(input_tfrecord,
output_type: What type of output, tf_example of tf_sequence_example
max_clip_length: The maximum length of a sequence example, before
splitting into multiple
context_feature_length: The length of the context feature embeddings stored
in the input data.
"""
def pipeline(root):
if output_type == 'tf_example':
coder = beam.coders.ProtoCoder(tf.train.Example)
elif output_type == 'tf_sequence_example':
coder = beam.coders.ProtoCoder(tf.train.SequenceExample)
else:
raise ValueError('Unsupported output type.')
input_collection = (
root | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
input_tfrecord,
coder=beam.coders.BytesCoder()))
rekey_collection = input_collection | 'RekeyExamples' >> beam.ParDo(
ReKeyDataFn(sequence_key, time_horizon,
reduce_image_size, max_image_dimension))
grouped_collection = (
rekey_collection | 'GroupBySequenceKey' >> beam.GroupByKey())
grouped_collection = (
grouped_collection | 'ReshuffleGroups' >> beam.Reshuffle())
ordered_collection = (
grouped_collection | 'OrderByFrameNumber' >> beam.ParDo(
SortGroupedDataFn(sequence_key, sorted_image_ids,
max_num_elements_in_context_features)))
ordered_collection = (
ordered_collection | 'ReshuffleSortedGroups' >> beam.Reshuffle())
output_collection = (
ordered_collection | 'AddContextToExamples' >> beam.ParDo(
GenerateContextFn(
sequence_key, add_context_features, image_ids_to_keep,
keep_context_features_image_id_list=(
keep_context_features_image_id_list),
subsample_context_features_rate=subsample_context_features_rate,
keep_only_positives=keep_only_positives,
keep_only_positives_gt=keep_only_positives_gt,
context_features_score_threshold=(
context_features_score_threshold),
max_num_elements_in_context_features=(
max_num_elements_in_context_features),
output_type=output_type,
max_clip_length=max_clip_length)))
output_collection = (
output_collection | 'ReshuffleExamples' >> beam.Reshuffle())
_ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
output_tfrecord,
num_shards=num_shards,
coder=coder)
return pipeline
def main(_):
"""Runs the Beam pipeline that builds context features.
if output_type == 'tf_example':
coder = beam.coders.ProtoCoder(tf.train.Example)
elif output_type == 'tf_sequence_example':
coder = beam.coders.ProtoCoder(tf.train.SequenceExample)
else:
raise ValueError('Unsupported output type.')
input_collection = (
pipeline | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
input_tfrecord,
coder=beam.coders.BytesCoder()))
rekey_collection = input_collection | 'RekeyExamples' >> beam.ParDo(
ReKeyDataFn(sequence_key, time_horizon,
reduce_image_size, max_image_dimension))
grouped_collection = (
rekey_collection | 'GroupBySequenceKey' >> beam.GroupByKey())
grouped_collection = (
grouped_collection | 'ReshuffleGroups' >> beam.Reshuffle())
ordered_collection = (
grouped_collection | 'OrderByFrameNumber' >> beam.ParDo(
SortGroupedDataFn(sequence_key, sorted_image_ids,
max_num_elements_in_context_features)))
ordered_collection = (
ordered_collection | 'ReshuffleSortedGroups' >> beam.Reshuffle())
output_collection = (
ordered_collection | 'AddContextToExamples' >> beam.ParDo(
GenerateContextFn(
sequence_key, add_context_features, image_ids_to_keep,
keep_context_features_image_id_list=(
keep_context_features_image_id_list),
subsample_context_features_rate=subsample_context_features_rate,
keep_only_positives=keep_only_positives,
keep_only_positives_gt=keep_only_positives_gt,
context_features_score_threshold=(
context_features_score_threshold),
max_num_elements_in_context_features=(
max_num_elements_in_context_features),
output_type=output_type,
max_clip_length=max_clip_length,
context_feature_length=context_feature_length)))
output_collection = (
output_collection | 'ReshuffleExamples' >> beam.Reshuffle())
_ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
output_tfrecord,
num_shards=num_shards,
coder=coder)
def parse_args(argv):
"""Command-line argument parser.
Args:
_: unused
argv: command line arguments
Returns:
beam_args: Arguments for the beam pipeline.
pipeline_args: Arguments for the pipeline options, such as runner type.
"""
# must create before flags are used
runner = runners.DirectRunner()
parser = argparse.ArgumentParser()
parser.add_argument(
'--input_tfrecord',
dest='input_tfrecord',
required=True,
help='TFRecord containing images in tf.Example format for object '
'detection, with bounding boxes and contextual feature embeddings.')
parser.add_argument(
'--output_tfrecord',
dest='output_tfrecord',
required=True,
help='TFRecord containing images in tf.Example format, with added '
'contextual memory banks.')
parser.add_argument(
'--sequence_key',
dest='sequence_key',
default='image/location',
help='Key to use when grouping sequences: so far supports `image/seq_id` '
'and `image/location`.')
parser.add_argument(
'--context_feature_length',
dest='context_feature_length',
default=2057,
help='The length of the context feature embeddings stored in the input '
'data.')
parser.add_argument(
'--time_horizon',
dest='time_horizon',
default=None,
help='What time horizon to use when splitting the data, if any. Options '
'are: `year`, `month`, `week`, `day `, `hour`, `minute`, `None`.')
parser.add_argument(
'--subsample_context_features_rate',
dest='subsample_context_features_rate',
default=0,
help='Whether to subsample the context_features, and if so how many to '
'sample. If the rate is set to X, it will sample context from 1 out of '
'every X images. Default is sampling from every image, which is X=0.')
parser.add_argument(
'--reduce_image_size',
dest='reduce_image_size',
default=True,
help='downsamples images to have longest side max_image_dimension, '
'maintaining aspect ratio')
parser.add_argument(
'--max_image_dimension',
dest='max_image_dimension',
default=1024,
help='Sets max image dimension for resizing.')
parser.add_argument(
'--add_context_features',
dest='add_context_features',
default=True,
help='Adds a memory bank of embeddings to each clip')
parser.add_argument(
'--sorted_image_ids',
dest='sorted_image_ids',
default=True,
help='Whether the image source_ids are sortable to deal with '
'date_captured tie-breaks.')
parser.add_argument(
'--image_ids_to_keep',
dest='image_ids_to_keep',
default='All',
help='Path to .json list of image ids to keep, used for ground truth '
'eval creation.')
parser.add_argument(
'--keep_context_features_image_id_list',
dest='keep_context_features_image_id_list',
default=False,
help='Whether or not to keep a list of the image_ids corresponding to '
'the memory bank.')
parser.add_argument(
'--keep_only_positives',
dest='keep_only_positives',
default=False,
help='Whether or not to keep only positive boxes based on score.')
parser.add_argument(
'--context_features_score_threshold',
dest='context_features_score_threshold',
default=0.7,
help='What score threshold to use for boxes in context_features, when '
'`keep_only_positives` is set to `True`.')
parser.add_argument(
'--keep_only_positives_gt',
dest='keep_only_positives_gt',
default=False,
help='Whether or not to keep only positive boxes based on gt class.')
parser.add_argument(
'--max_num_elements_in_context_features',
dest='max_num_elements_in_context_features',
default=2000,
help='Sets max number of context feature elements per memory bank. '
'If the number of images in the context group is greater than '
'`max_num_elements_in_context_features`, the context group will be split.'
)
parser.add_argument(
'--output_type',
dest='output_type',
default='tf_example',
help='Output type, one of `tf_example`, `tf_sequence_example`.')
parser.add_argument(
'--max_clip_length',
dest='max_clip_length',
default=None,
help='Max length for sequence example outputs.')
parser.add_argument(
'--num_shards',
dest='num_shards',
default=0,
help='Number of output shards.')
beam_args, pipeline_args = parser.parse_known_args(argv)
return beam_args, pipeline_args
def main(argv=None, save_main_session=True):
"""Runs the Beam pipeline that performs inference.
dirname = os.path.dirname(FLAGS.output_tfrecord)
Args:
argv: Command line arguments.
save_main_session: Whether to save the main session.
"""
args, pipeline_args = parse_args(argv)
pipeline_options = beam.options.pipeline_options.PipelineOptions(
pipeline_args)
pipeline_options.view_as(
beam.options.pipeline_options.SetupOptions).save_main_session = (
save_main_session)
dirname = os.path.dirname(args.output_tfrecord)
tf.io.gfile.makedirs(dirname)
runner.run(
construct_pipeline(FLAGS.input_tfrecord,
FLAGS.output_tfrecord,
FLAGS.sequence_key,
FLAGS.time_horizon,
FLAGS.subsample_context_features_rate,
FLAGS.reduce_image_size,
FLAGS.max_image_dimension,
FLAGS.add_context_features,
FLAGS.sorted_image_ids,
FLAGS.image_ids_to_keep,
FLAGS.keep_context_features_image_id_list,
FLAGS.keep_only_positives,
FLAGS.context_features_score_threshold,
FLAGS.keep_only_positives_gt,
FLAGS.max_num_elements_in_context_features,
FLAGS.num_shards,
FLAGS.output_type,
FLAGS.max_clip_length))
p = beam.Pipeline(options=pipeline_options)
construct_pipeline(
p,
args.input_tfrecord,
args.output_tfrecord,
args.sequence_key,
args.time_horizon,
args.subsample_context_features_rate,
args.reduce_image_size,
args.max_image_dimension,
args.add_context_features,
args.sorted_image_ids,
args.image_ids_to_keep,
args.keep_context_features_image_id_list,
args.keep_only_positives,
args.context_features_score_threshold,
args.keep_only_positives_gt,
args.max_num_elements_in_context_features,
args.output_type,
args.max_clip_length,
args.context_feature_length)
p.run()
if __name__ == '__main__':
flags.mark_flags_as_required([
'input_tfrecord',
'output_tfrecord'
])
app.run(main)
......@@ -22,13 +22,13 @@ import datetime
import os
import tempfile
import unittest
import apache_beam as beam
import numpy as np
import six
import tensorflow.compat.v1 as tf
from object_detection.dataset_tools.context_rcnn import add_context_to_examples
from object_detection.utils import tf_version
from apache_beam import runners
@contextlib.contextmanager
......@@ -200,7 +200,7 @@ class GenerateContextDataTest(tf.test.TestCase):
seq_feature_dict['region/label/string'].feature[1].bytes_list.value[:])
def assert_expected_key(self, key):
self.assertAllEqual(key, '01')
self.assertAllEqual(key, b'01')
def assert_sorted(self, example_collection):
example_list = list(example_collection)
......@@ -329,19 +329,22 @@ class GenerateContextDataTest(tf.test.TestCase):
with InMemoryTFRecord(
[self._create_first_tf_example(),
self._create_second_tf_example()]) as input_tfrecord:
runner = runners.DirectRunner()
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
sequence_key = six.ensure_binary('image/seq_id')
max_num_elements = 10
num_shards = 1
pipeline = add_context_to_examples.construct_pipeline(
pipeline_options = beam.options.pipeline_options.PipelineOptions(
runner='DirectRunner')
p = beam.Pipeline(options=pipeline_options)
add_context_to_examples.construct_pipeline(
p,
input_tfrecord,
output_tfrecord,
sequence_key,
max_num_elements_in_context_features=max_num_elements,
num_shards=num_shards)
runner.run(pipeline)
p.run()
filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
actual_output = []
record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
......@@ -355,20 +358,23 @@ class GenerateContextDataTest(tf.test.TestCase):
with InMemoryTFRecord(
[self._create_first_tf_example(),
self._create_second_tf_example()]) as input_tfrecord:
runner = runners.DirectRunner()
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
sequence_key = six.ensure_binary('image/seq_id')
max_num_elements = 10
num_shards = 1
pipeline = add_context_to_examples.construct_pipeline(
pipeline_options = beam.options.pipeline_options.PipelineOptions(
runner='DirectRunner')
p = beam.Pipeline(options=pipeline_options)
add_context_to_examples.construct_pipeline(
p,
input_tfrecord,
output_tfrecord,
sequence_key,
max_num_elements_in_context_features=max_num_elements,
num_shards=num_shards,
output_type='tf_sequence_example')
runner.run(pipeline)
p.run()
filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
actual_output = []
record_iterator = tf.python_io.tf_record_iterator(
......
......@@ -33,32 +33,19 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import hashlib
import io
import json
import logging
import os
from absl import app
from absl import flags
import apache_beam as beam
import numpy as np
import PIL.Image
import tensorflow.compat.v1 as tf
from apache_beam import runners
from object_detection.utils import dataset_util
flags.DEFINE_string('image_directory', None, 'Directory where images are '
'stored')
flags.DEFINE_string('output_tfrecord_prefix', None,
'TFRecord containing images in tf.Example format.')
flags.DEFINE_string('input_annotations_file', None, 'Path to Coco-CameraTraps'
'style annotations file')
flags.DEFINE_integer('num_images_per_shard',
200,
'The number of images to be stored in each shard.')
FLAGS = flags.FLAGS
class ParseImage(beam.DoFn):
"""A DoFn that parses a COCO-CameraTraps json and emits TFRecords."""
......@@ -243,13 +230,14 @@ class ParseImage(beam.DoFn):
return [(example)]
def _load_json_data(data_file):
def load_json_data(data_file):
with tf.io.gfile.GFile(data_file, 'r') as fid:
data_dict = json.load(fid)
return data_dict
def create_pipeline(image_directory,
def create_pipeline(pipeline,
image_directory,
input_annotations_file,
output_tfrecord_prefix=None,
num_images_per_shard=200,
......@@ -257,68 +245,97 @@ def create_pipeline(image_directory,
"""Creates a beam pipeline for producing a COCO-CameraTraps Image dataset.
Args:
pipeline: Initialized beam pipeline.
image_directory: Path to image directory
input_annotations_file: Path to a coco-cameratraps annotation file
output_tfrecord_prefix: Absolute path for tfrecord outputs. Final files will
be named {output_tfrecord_prefix}@N.
num_images_per_shard: The number of images to store in each shard
keep_bboxes: Whether to keep any bounding boxes that exist in the json file
Returns:
A Beam pipeline.
"""
logging.info('Reading data from COCO-CameraTraps Dataset.')
data = _load_json_data(input_annotations_file)
data = load_json_data(input_annotations_file)
num_shards = int(np.ceil(float(len(data['images']))/num_images_per_shard))
def pipeline(root):
"""Builds beam pipeline."""
image_examples = (
pipeline | ('CreateCollections') >> beam.Create(
[im['id'] for im in data['images']])
| ('ParseImage') >> beam.ParDo(ParseImage(
image_directory, data['images'], data['annotations'],
data['categories'], keep_bboxes=keep_bboxes)))
_ = (image_examples
| ('Reshuffle') >> beam.Reshuffle()
| ('WriteTfImageExample') >> beam.io.tfrecordio.WriteToTFRecord(
output_tfrecord_prefix,
num_shards=num_shards,
coder=beam.coders.ProtoCoder(tf.train.Example)))
image_examples = (
root
| ('CreateCollections') >> beam.Create(
[im['id'] for im in data['images']])
| ('ParseImage') >> beam.ParDo(ParseImage(
image_directory, data['images'], data['annotations'],
data['categories'], keep_bboxes=keep_bboxes)))
_ = (image_examples
| ('Reshuffle') >> beam.Reshuffle()
| ('WriteTfImageExample') >> beam.io.tfrecordio.WriteToTFRecord(
output_tfrecord_prefix,
num_shards=num_shards,
coder=beam.coders.ProtoCoder(tf.train.Example)))
return pipeline
def parse_args(argv):
"""Command-line argument parser.
def main(_):
Args:
argv: command line arguments
Returns:
beam_args: Arguments for the beam pipeline.
pipeline_args: Arguments for the pipeline options, such as runner type.
"""
parser = argparse.ArgumentParser()
parser.add_argument(
'--image_directory',
dest='image_directory',
required=True,
help='Path to the directory where the images are stored.')
parser.add_argument(
'--output_tfrecord_prefix',
dest='output_tfrecord_prefix',
required=True,
help='Path and prefix to store TFRecords containing images in tf.Example'
'format.')
parser.add_argument(
'--input_annotations_file',
dest='input_annotations_file',
required=True,
help='Path to Coco-CameraTraps style annotations file.')
parser.add_argument(
'--num_images_per_shard',
dest='num_images_per_shard',
default=200,
help='The number of images to be stored in each outputshard.')
beam_args, pipeline_args = parser.parse_known_args(argv)
return beam_args, pipeline_args
def main(argv=None, save_main_session=True):
"""Runs the Beam pipeline that performs inference.
Args:
_: unused
argv: Command line arguments.
save_main_session: Whether to save the main session.
"""
args, pipeline_args = parse_args(argv)
# must create before flags are used
runner = runners.DirectRunner()
pipeline_options = beam.options.pipeline_options.PipelineOptions(
pipeline_args)
pipeline_options.view_as(
beam.options.pipeline_options.SetupOptions).save_main_session = (
save_main_session)
dirname = os.path.dirname(FLAGS.output_tfrecord_prefix)
dirname = os.path.dirname(args.output_tfrecord_prefix)
tf.io.gfile.makedirs(dirname)
runner.run(
create_pipeline(
image_directory=FLAGS.image_directory,
input_annotations_file=FLAGS.input_annotations_file,
output_tfrecord_prefix=FLAGS.output_tfrecord_prefix,
num_images_per_shard=FLAGS.num_images_per_shard))
p = beam.Pipeline(options=pipeline_options)
create_pipeline(
pipeline=p,
image_directory=args.image_directory,
input_annotations_file=args.input_annotations_file,
output_tfrecord_prefix=args.output_tfrecord_prefix,
num_images_per_shard=args.num_images_per_shard)
p.run()
if __name__ == '__main__':
flags.mark_flags_as_required([
'image_directory',
'input_annotations_file',
'output_tfrecord_prefix'
])
app.run(main)
......@@ -21,13 +21,14 @@ import json
import os
import tempfile
import unittest
import apache_beam as beam
import numpy as np
from PIL import Image
import tensorflow.compat.v1 as tf
from object_detection.dataset_tools.context_rcnn import create_cococameratraps_tfexample_main
from object_detection.utils import tf_version
from apache_beam import runners
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
......@@ -95,13 +96,13 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
.int64_list.value, [1])
self.assertAllEqual(
example.features.feature['image/object/class/text']
.bytes_list.value, ['animal'])
.bytes_list.value, [b'animal'])
self.assertAllClose(
example.features.feature['image/class/label']
.int64_list.value, [1])
self.assertAllEqual(
example.features.feature['image/class/text']
.bytes_list.value, ['animal'])
.bytes_list.value, [b'animal'])
# Check other essential attributes.
self.assertAllEqual(
......@@ -112,7 +113,7 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
[self.IMAGE_WIDTH])
self.assertAllEqual(
example.features.feature['image/source_id'].bytes_list.value,
['im_0'])
[b'im_0'])
self.assertTrue(
example.features.feature['image/encoded'].bytes_list.value)
......@@ -134,13 +135,13 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
.int64_list.value, [1])
self.assertAllEqual(
example.features.feature['image/object/class/text']
.bytes_list.value, ['animal'])
.bytes_list.value, [b'animal'])
self.assertAllClose(
example.features.feature['image/class/label']
.int64_list.value, [1])
self.assertAllEqual(
example.features.feature['image/class/text']
.bytes_list.value, ['animal'])
.bytes_list.value, [b'animal'])
# Check other essential attributes.
self.assertAllEqual(
......@@ -151,21 +152,23 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
[self.IMAGE_WIDTH])
self.assertAllEqual(
example.features.feature['image/source_id'].bytes_list.value,
['im_0'])
[b'im_0'])
self.assertTrue(
example.features.feature['image/encoded'].bytes_list.value)
def test_beam_pipeline(self):
runner = runners.DirectRunner()
num_frames = 1
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
json_path = self._create_json_file(temp_dir, num_frames)
output_tfrecord = temp_dir+'/output'
self._write_random_images_to_directory(temp_dir, num_frames)
pipeline = create_cococameratraps_tfexample_main.create_pipeline(
temp_dir, json_path,
pipeline_options = beam.options.pipeline_options.PipelineOptions(
runner='DirectRunner')
p = beam.Pipeline(options=pipeline_options)
create_cococameratraps_tfexample_main.create_pipeline(
p, temp_dir, json_path,
output_tfrecord_prefix=output_tfrecord)
runner.run(pipeline)
p.run()
filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
actual_output = []
record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
......@@ -176,17 +179,19 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
actual_output[0]))
def test_beam_pipeline_bbox(self):
runner = runners.DirectRunner()
num_frames = 1
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
json_path = self._create_json_file(temp_dir, num_frames, keep_bboxes=True)
output_tfrecord = temp_dir+'/output'
self._write_random_images_to_directory(temp_dir, num_frames)
pipeline = create_cococameratraps_tfexample_main.create_pipeline(
temp_dir, json_path,
pipeline_options = beam.options.pipeline_options.PipelineOptions(
runner='DirectRunner')
p = beam.Pipeline(options=pipeline_options)
create_cococameratraps_tfexample_main.create_pipeline(
p, temp_dir, json_path,
output_tfrecord_prefix=output_tfrecord,
keep_bboxes=True)
runner.run(pipeline)
p.run()
filenames = tf.io.gfile.glob(output_tfrecord+'-?????-of-?????')
actual_output = []
record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
......
......@@ -45,26 +45,12 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import os
import threading
from absl import app
from absl import flags
import apache_beam as beam
import tensorflow.compat.v1 as tf
from apache_beam import runners
flags.DEFINE_string('detection_input_tfrecord', None, 'TFRecord containing '
'images in tf.Example format for object detection.')
flags.DEFINE_string('detection_output_tfrecord', None,
'TFRecord containing detections in tf.Example format.')
flags.DEFINE_string('detection_model_dir', None, 'Path to directory containing'
'an object detection SavedModel.')
flags.DEFINE_float('confidence_threshold', 0.9,
'Min confidence to keep bounding boxes')
flags.DEFINE_integer('num_shards', 0, 'Number of output shards.')
FLAGS = flags.FLAGS
class GenerateDetectionDataFn(beam.DoFn):
......@@ -205,58 +191,103 @@ class GenerateDetectionDataFn(beam.DoFn):
return [example]
def construct_pipeline(input_tfrecord, output_tfrecord, model_dir,
def construct_pipeline(pipeline, input_tfrecord, output_tfrecord, model_dir,
confidence_threshold, num_shards):
"""Returns a Beam pipeline to run object detection inference.
Args:
pipeline: Initialized beam pipeline.
input_tfrecord: A TFRecord of tf.train.Example protos containing images.
output_tfrecord: A TFRecord of tf.train.Example protos that contain images
in the input TFRecord and the detections from the model.
model_dir: Path to `saved_model` to use for inference.
confidence_threshold: Threshold to use when keeping detection results.
num_shards: The number of output shards.
"""
input_collection = (
pipeline | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
input_tfrecord,
coder=beam.coders.BytesCoder()))
output_collection = input_collection | 'RunInference' >> beam.ParDo(
GenerateDetectionDataFn(model_dir, confidence_threshold))
output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle()
_ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
output_tfrecord,
num_shards=num_shards,
coder=beam.coders.ProtoCoder(tf.train.Example))
def parse_args(argv):
"""Command-line argument parser.
Args:
argv: command line arguments
Returns:
pipeline: A Beam pipeline.
beam_args: Arguments for the beam pipeline.
pipeline_args: Arguments for the pipeline options, such as runner type.
"""
def pipeline(root):
input_collection = (
root | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
input_tfrecord,
coder=beam.coders.BytesCoder()))
output_collection = input_collection | 'RunInference' >> beam.ParDo(
GenerateDetectionDataFn(model_dir, confidence_threshold))
output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle()
_ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
output_tfrecord,
num_shards=num_shards,
coder=beam.coders.ProtoCoder(tf.train.Example))
return pipeline
def main(_):
parser = argparse.ArgumentParser()
parser.add_argument(
'--detection_input_tfrecord',
dest='detection_input_tfrecord',
required=True,
help='TFRecord containing images in tf.Example format for object '
'detection.')
parser.add_argument(
'--detection_output_tfrecord',
dest='detection_output_tfrecord',
required=True,
help='TFRecord containing detections in tf.Example format.')
parser.add_argument(
'--detection_model_dir',
dest='detection_model_dir',
required=True,
help='Path to directory containing an object detection SavedModel.')
parser.add_argument(
'--confidence_threshold',
dest='confidence_threshold',
default=0.9,
help='Min confidence to keep bounding boxes.')
parser.add_argument(
'--num_shards',
dest='num_shards',
default=0,
help='Number of output shards.')
beam_args, pipeline_args = parser.parse_known_args(argv)
return beam_args, pipeline_args
def main(argv=None, save_main_session=True):
"""Runs the Beam pipeline that performs inference.
Args:
_: unused
argv: Command line arguments.
save_main_session: Whether to save the main session.
"""
# must create before flags are used
runner = runners.DirectRunner()
dirname = os.path.dirname(FLAGS.detection_output_tfrecord)
args, pipeline_args = parse_args(argv)
pipeline_options = beam.options.pipeline_options.PipelineOptions(
pipeline_args)
pipeline_options.view_as(
beam.options.pipeline_options.SetupOptions).save_main_session = (
save_main_session)
dirname = os.path.dirname(args.detection_output_tfrecord)
tf.io.gfile.makedirs(dirname)
runner.run(
construct_pipeline(FLAGS.detection_input_tfrecord,
FLAGS.detection_output_tfrecord,
FLAGS.detection_model_dir,
FLAGS.confidence_threshold,
FLAGS.num_shards))
p = beam.Pipeline(options=pipeline_options)
construct_pipeline(
p,
args.detection_input_tfrecord,
args.detection_output_tfrecord,
args.detection_model_dir,
args.confidence_threshold,
args.num_shards)
p.run()
if __name__ == '__main__':
flags.mark_flags_as_required([
'detection_input_tfrecord',
'detection_output_tfrecord',
'detection_model_dir'
])
app.run(main)
......@@ -22,6 +22,7 @@ import contextlib
import os
import tempfile
import unittest
import apache_beam as beam
import numpy as np
import six
import tensorflow.compat.v1 as tf
......@@ -32,7 +33,6 @@ from object_detection.core import model
from object_detection.dataset_tools.context_rcnn import generate_detection_data
from object_detection.protos import pipeline_pb2
from object_detection.utils import tf_version
from apache_beam import runners
if six.PY2:
import mock # pylint: disable=g-import-not-at-top
......@@ -67,6 +67,9 @@ class FakeModel(model.DetectionModel):
def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
pass
def restore_from_objects(self, fine_tune_checkpoint_type):
pass
def loss(self, prediction_dict, true_image_shapes):
pass
......@@ -243,16 +246,18 @@ class GenerateDetectionDataTest(tf.test.TestCase):
def test_beam_pipeline(self):
with InMemoryTFRecord([self._create_tf_example()]) as input_tfrecord:
runner = runners.DirectRunner()
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
saved_model_path = self._export_saved_model()
confidence_threshold = 0.8
num_shards = 1
pipeline = generate_detection_data.construct_pipeline(
input_tfrecord, output_tfrecord, saved_model_path,
pipeline_options = beam.options.pipeline_options.PipelineOptions(
runner='DirectRunner')
p = beam.Pipeline(options=pipeline_options)
generate_detection_data.construct_pipeline(
p, input_tfrecord, output_tfrecord, saved_model_path,
confidence_threshold, num_shards)
runner.run(pipeline)
p.run()
filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
actual_output = []
record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
......
......@@ -47,34 +47,17 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import datetime
import os
import threading
from absl import app
from absl import flags
import apache_beam as beam
import numpy as np
import six
import tensorflow.compat.v1 as tf
from apache_beam import runners
flags.DEFINE_string('embedding_input_tfrecord', None, 'TFRecord containing'
'images in tf.Example format for object detection.')
flags.DEFINE_string('embedding_output_tfrecord', None,
'TFRecord containing embeddings in tf.Example format.')
flags.DEFINE_string('embedding_model_dir', None, 'Path to directory containing'
'an object detection SavedModel with'
'detection_box_classifier_features in the output.')
flags.DEFINE_integer('top_k_embedding_count', 1,
'The number of top k embeddings to add to the memory bank.'
)
flags.DEFINE_integer('bottom_k_embedding_count', 0,
'The number of bottom k embeddings to add to the memory '
'bank.')
flags.DEFINE_integer('num_shards', 0, 'Number of output shards.')
FLAGS = flags.FLAGS
class GenerateEmbeddingDataFn(beam.DoFn):
......@@ -321,12 +304,13 @@ class GenerateEmbeddingDataFn(beam.DoFn):
return [example]
def construct_pipeline(input_tfrecord, output_tfrecord, model_dir,
def construct_pipeline(pipeline, input_tfrecord, output_tfrecord, model_dir,
top_k_embedding_count, bottom_k_embedding_count,
num_shards):
"""Returns a beam pipeline to run object detection inference.
Args:
pipeline: Initialized beam pipeline.
input_tfrecord: An TFRecord of tf.train.Example protos containing images.
output_tfrecord: An TFRecord of tf.train.Example protos that contain images
in the input TFRecord and the detections from the model.
......@@ -335,44 +319,96 @@ def construct_pipeline(input_tfrecord, output_tfrecord, model_dir,
bottom_k_embedding_count: The number of low-confidence embeddings to store.
num_shards: The number of output shards.
"""
def pipeline(root):
input_collection = (
root | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
input_tfrecord,
coder=beam.coders.BytesCoder()))
output_collection = input_collection | 'ExtractEmbedding' >> beam.ParDo(
GenerateEmbeddingDataFn(model_dir, top_k_embedding_count,
bottom_k_embedding_count))
output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle()
_ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
output_tfrecord,
num_shards=num_shards,
coder=beam.coders.ProtoCoder(tf.train.Example))
return pipeline
def main(_):
input_collection = (
pipeline | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
input_tfrecord,
coder=beam.coders.BytesCoder()))
output_collection = input_collection | 'ExtractEmbedding' >> beam.ParDo(
GenerateEmbeddingDataFn(model_dir, top_k_embedding_count,
bottom_k_embedding_count))
output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle()
_ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
output_tfrecord,
num_shards=num_shards,
coder=beam.coders.ProtoCoder(tf.train.Example))
def parse_args(argv):
"""Command-line argument parser.
Args:
argv: command line arguments
Returns:
beam_args: Arguments for the beam pipeline.
pipeline_args: Arguments for the pipeline options, such as runner type.
"""
parser = argparse.ArgumentParser()
parser.add_argument(
'--embedding_input_tfrecord',
dest='embedding_input_tfrecord',
required=True,
help='TFRecord containing images in tf.Example format for object '
'detection.')
parser.add_argument(
'--embedding_output_tfrecord',
dest='embedding_output_tfrecord',
required=True,
help='TFRecord containing embeddings in tf.Example format.')
parser.add_argument(
'--embedding_model_dir',
dest='embedding_model_dir',
required=True,
help='Path to directory containing an object detection SavedModel with'
'detection_box_classifier_features in the output.')
parser.add_argument(
'--top_k_embedding_count',
dest='top_k_embedding_count',
default=1,
help='The number of top k embeddings to add to the memory bank.')
parser.add_argument(
'--bottom_k_embedding_count',
dest='bottom_k_embedding_count',
default=0,
help='The number of bottom k embeddings to add to the memory bank.')
parser.add_argument(
'--num_shards',
dest='num_shards',
default=0,
help='Number of output shards.')
beam_args, pipeline_args = parser.parse_known_args(argv)
return beam_args, pipeline_args
def main(argv=None, save_main_session=True):
"""Runs the Beam pipeline that performs inference.
Args:
_: unused
argv: Command line arguments.
save_main_session: Whether to save the main session.
"""
# must create before flags are used
runner = runners.DirectRunner()
args, pipeline_args = parse_args(argv)
pipeline_options = beam.options.pipeline_options.PipelineOptions(
pipeline_args)
pipeline_options.view_as(
beam.options.pipeline_options.SetupOptions).save_main_session = (
save_main_session)
dirname = os.path.dirname(FLAGS.embedding_output_tfrecord)
dirname = os.path.dirname(args.embedding_output_tfrecord)
tf.io.gfile.makedirs(dirname)
runner.run(
construct_pipeline(FLAGS.embedding_input_tfrecord,
FLAGS.embedding_output_tfrecord,
FLAGS.embedding_model_dir, FLAGS.top_k_embedding_count,
FLAGS.bottom_k_embedding_count, FLAGS.num_shards))
p = beam.Pipeline(options=pipeline_options)
construct_pipeline(
p,
args.embedding_input_tfrecord,
args.embedding_output_tfrecord,
args.embedding_model_dir,
args.top_k_embedding_count,
args.bottom_k_embedding_count,
args.num_shards)
p.run()
if __name__ == '__main__':
flags.mark_flags_as_required([
'embedding_input_tfrecord',
'embedding_output_tfrecord',
'embedding_model_dir'
])
app.run(main)
......@@ -21,6 +21,7 @@ import contextlib
import os
import tempfile
import unittest
import apache_beam as beam
import numpy as np
import six
import tensorflow.compat.v1 as tf
......@@ -30,7 +31,7 @@ from object_detection.core import model
from object_detection.dataset_tools.context_rcnn import generate_embedding_data
from object_detection.protos import pipeline_pb2
from object_detection.utils import tf_version
from apache_beam import runners
if six.PY2:
import mock # pylint: disable=g-import-not-at-top
......@@ -73,6 +74,9 @@ class FakeModel(model.DetectionModel):
def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
pass
def restore_from_objects(self, fine_tune_checkpoint_type):
pass
def loss(self, prediction_dict, true_image_shapes):
pass
......@@ -236,13 +240,13 @@ class GenerateEmbeddingData(tf.test.TestCase):
.int64_list.value, [5])
self.assertAllEqual(
example.features.feature['image/object/class/text']
.bytes_list.value, ['hyena'])
.bytes_list.value, [b'hyena'])
self.assertAllClose(
example.features.feature['image/class/label']
.int64_list.value, [5])
self.assertAllEqual(
example.features.feature['image/class/text']
.bytes_list.value, ['hyena'])
.bytes_list.value, [b'hyena'])
# Check other essential attributes.
self.assertAllEqual(
......@@ -251,7 +255,7 @@ class GenerateEmbeddingData(tf.test.TestCase):
example.features.feature['image/width'].int64_list.value, [600])
self.assertAllEqual(
example.features.feature['image/source_id'].bytes_list.value,
['image_id'])
[b'image_id'])
self.assertTrue(
example.features.feature['image/encoded'].bytes_list.value)
......@@ -268,7 +272,7 @@ class GenerateEmbeddingData(tf.test.TestCase):
.int64_list.value, [5])
self.assertAllEqual(tf.train.Example.FromString(
generated_example).features.feature['image/object/class/text']
.bytes_list.value, ['hyena'])
.bytes_list.value, [b'hyena'])
output = inference_fn.process(generated_example)
output_example = output[0]
self.assert_expected_example(output_example)
......@@ -304,24 +308,26 @@ class GenerateEmbeddingData(tf.test.TestCase):
.feature['image/object/class/label'].int64_list.value, [5])
self.assertAllEqual(
tf.train.Example.FromString(generated_example).features
.feature['image/object/class/text'].bytes_list.value, ['hyena'])
.feature['image/object/class/text'].bytes_list.value, [b'hyena'])
output = inference_fn.process(generated_example)
output_example = output[0]
self.assert_expected_example(output_example, botk=True)
def test_beam_pipeline(self):
with InMemoryTFRecord([self._create_tf_example()]) as input_tfrecord:
runner = runners.DirectRunner()
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
saved_model_path = self._export_saved_model()
top_k_embedding_count = 1
bottom_k_embedding_count = 0
num_shards = 1
pipeline = generate_embedding_data.construct_pipeline(
input_tfrecord, output_tfrecord, saved_model_path,
pipeline_options = beam.options.pipeline_options.PipelineOptions(
runner='DirectRunner')
p = beam.Pipeline(options=pipeline_options)
generate_embedding_data.construct_pipeline(
p, input_tfrecord, output_tfrecord, saved_model_path,
top_k_embedding_count, bottom_k_embedding_count, num_shards)
runner.run(pipeline)
p.run()
filenames = tf.io.gfile.glob(
output_tfrecord + '-?????-of-?????')
actual_output = []
......
......@@ -14,6 +14,9 @@
# ==============================================================================
r"""Convert raw COCO dataset to TFRecord for object_detection.
This tool supports data generation for object detection (boxes, masks),
keypoint detection, and DensePose.
Please note that this tool creates sharded output files.
Example usage:
......@@ -63,7 +66,18 @@ tf.flags.DEFINE_string('train_keypoint_annotations_file', '',
'Training annotations JSON file.')
tf.flags.DEFINE_string('val_keypoint_annotations_file', '',
'Validation annotations JSON file.')
# DensePose is only available for coco 2014.
tf.flags.DEFINE_string('train_densepose_annotations_file', '',
'Training annotations JSON file for DensePose.')
tf.flags.DEFINE_string('val_densepose_annotations_file', '',
'Validation annotations JSON file for DensePose.')
tf.flags.DEFINE_string('output_dir', '/tmp/', 'Output data directory.')
# Whether to only produce images/annotations on person class (for keypoint /
# densepose task).
tf.flags.DEFINE_boolean('remove_non_person_annotations', False, 'Whether to '
'remove all annotations for non-person objects.')
tf.flags.DEFINE_boolean('remove_non_person_images', False, 'Whether to '
'remove all examples that do not contain a person.')
FLAGS = flags.FLAGS
......@@ -77,13 +91,33 @@ _COCO_KEYPOINT_NAMES = [
b'left_knee', b'right_knee', b'left_ankle', b'right_ankle'
]
_COCO_PART_NAMES = [
b'torso_back', b'torso_front', b'right_hand', b'left_hand', b'left_foot',
b'right_foot', b'right_upper_leg_back', b'left_upper_leg_back',
b'right_upper_leg_front', b'left_upper_leg_front', b'right_lower_leg_back',
b'left_lower_leg_back', b'right_lower_leg_front', b'left_lower_leg_front',
b'left_upper_arm_back', b'right_upper_arm_back', b'left_upper_arm_front',
b'right_upper_arm_front', b'left_lower_arm_back', b'right_lower_arm_back',
b'left_lower_arm_front', b'right_lower_arm_front', b'right_face',
b'left_face',
]
_DP_PART_ID_OFFSET = 1
def clip_to_unit(x):
return min(max(x, 0.0), 1.0)
def create_tf_example(image,
annotations_list,
image_dir,
category_index,
include_masks=False,
keypoint_annotations_dict=None):
keypoint_annotations_dict=None,
densepose_annotations_dict=None,
remove_non_person_annotations=False,
remove_non_person_images=False):
"""Converts image and annotations to a tf.Example proto.
Args:
......@@ -108,10 +142,23 @@ def create_tf_example(image,
dictionary with keys: [u'keypoints', u'num_keypoints'] represeting the
keypoint information for this person object annotation. If None, then
no keypoint annotations will be populated.
densepose_annotations_dict: A dictionary that maps from annotation_id to a
dictionary with keys: [u'dp_I', u'dp_x', u'dp_y', 'dp_U', 'dp_V']
representing part surface coordinates. For more information see
http://densepose.org/.
remove_non_person_annotations: Whether to remove any annotations that are
not the "person" class.
remove_non_person_images: Whether to remove any images that do not contain
at least one "person" annotation.
Returns:
key: SHA256 hash of the image.
example: The converted tf.Example
num_annotations_skipped: Number of (invalid) annotations that were ignored.
num_keypoint_annotation_skipped: Number of keypoint annotations that were
skipped.
num_densepose_annotation_skipped: Number of DensePose annotations that were
skipped.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
......@@ -146,6 +193,16 @@ def create_tf_example(image,
num_annotations_skipped = 0
num_keypoint_annotation_used = 0
num_keypoint_annotation_skipped = 0
dp_part_index = []
dp_x = []
dp_y = []
dp_u = []
dp_v = []
dp_num_points = []
densepose_keys = ['dp_I', 'dp_U', 'dp_V', 'dp_x', 'dp_y', 'bbox']
include_densepose = densepose_annotations_dict is not None
num_densepose_annotation_used = 0
num_densepose_annotation_skipped = 0
for object_annotations in annotations_list:
(x, y, width, height) = tuple(object_annotations['bbox'])
if width <= 0 or height <= 0:
......@@ -154,14 +211,18 @@ def create_tf_example(image,
if x + width > image_width or y + height > image_height:
num_annotations_skipped += 1
continue
category_id = int(object_annotations['category_id'])
category_name = category_index[category_id]['name'].encode('utf8')
if remove_non_person_annotations and category_name != b'person':
num_annotations_skipped += 1
continue
xmin.append(float(x) / image_width)
xmax.append(float(x + width) / image_width)
ymin.append(float(y) / image_height)
ymax.append(float(y + height) / image_height)
is_crowd.append(object_annotations['iscrowd'])
category_id = int(object_annotations['category_id'])
category_ids.append(category_id)
category_names.append(category_index[category_id]['name'].encode('utf8'))
category_names.append(category_name)
area.append(object_annotations['area'])
if include_masks:
......@@ -197,6 +258,40 @@ def create_tf_example(image,
keypoints_visibility.extend([0] * len(_COCO_KEYPOINT_NAMES))
keypoints_name.extend(_COCO_KEYPOINT_NAMES)
num_keypoints.append(0)
if include_densepose:
annotation_id = object_annotations['id']
if (annotation_id in densepose_annotations_dict and
all(key in densepose_annotations_dict[annotation_id]
for key in densepose_keys)):
dp_annotations = densepose_annotations_dict[annotation_id]
num_densepose_annotation_used += 1
dp_num_points.append(len(dp_annotations['dp_I']))
dp_part_index.extend([int(i - _DP_PART_ID_OFFSET)
for i in dp_annotations['dp_I']])
# DensePose surface coordinates are defined on a [256, 256] grid
# relative to each instance box (i.e. absolute coordinates in range
# [0., 256.]). The following converts the coordinates
# so that they are expressed in normalized image coordinates.
dp_x_box_rel = [
clip_to_unit(val / 256.) for val in dp_annotations['dp_x']]
dp_x_norm = [(float(x) + x_box_rel * width) / image_width
for x_box_rel in dp_x_box_rel]
dp_y_box_rel = [
clip_to_unit(val / 256.) for val in dp_annotations['dp_y']]
dp_y_norm = [(float(y) + y_box_rel * height) / image_height
for y_box_rel in dp_y_box_rel]
dp_x.extend(dp_x_norm)
dp_y.extend(dp_y_norm)
dp_u.extend(dp_annotations['dp_U'])
dp_v.extend(dp_annotations['dp_V'])
else:
dp_num_points.append(0)
if (remove_non_person_images and
not any(name == b'person' for name in category_names)):
return (key, None, num_annotations_skipped,
num_keypoint_annotation_skipped, num_densepose_annotation_skipped)
feature_dict = {
'image/height':
dataset_util.int64_feature(image_height),
......@@ -243,15 +338,34 @@ def create_tf_example(image,
dataset_util.bytes_list_feature(keypoints_name))
num_keypoint_annotation_skipped = (
len(keypoint_annotations_dict) - num_keypoint_annotation_used)
if include_densepose:
feature_dict['image/object/densepose/num'] = (
dataset_util.int64_list_feature(dp_num_points))
feature_dict['image/object/densepose/part_index'] = (
dataset_util.int64_list_feature(dp_part_index))
feature_dict['image/object/densepose/x'] = (
dataset_util.float_list_feature(dp_x))
feature_dict['image/object/densepose/y'] = (
dataset_util.float_list_feature(dp_y))
feature_dict['image/object/densepose/u'] = (
dataset_util.float_list_feature(dp_u))
feature_dict['image/object/densepose/v'] = (
dataset_util.float_list_feature(dp_v))
num_densepose_annotation_skipped = (
len(densepose_annotations_dict) - num_densepose_annotation_used)
example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
return key, example, num_annotations_skipped, num_keypoint_annotation_skipped
return (key, example, num_annotations_skipped,
num_keypoint_annotation_skipped, num_densepose_annotation_skipped)
def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
output_path, include_masks,
num_shards,
keypoint_annotations_file=''):
keypoint_annotations_file='',
densepose_annotations_file='',
remove_non_person_annotations=False,
remove_non_person_images=False):
"""Loads COCO annotation json files and converts to tf.Record format.
Args:
......@@ -264,6 +378,12 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
keypoint_annotations_file: JSON file containing the person keypoint
annotations. If empty, then no person keypoint annotations will be
generated.
densepose_annotations_file: JSON file containing the DensePose annotations.
If empty, then no DensePose annotations will be generated.
remove_non_person_annotations: Whether to remove any annotations that are
not the "person" class.
remove_non_person_images: Whether to remove any images that do not contain
at least one "person" annotation.
"""
with contextlib2.ExitStack() as tf_record_close_stack, \
tf.gfile.GFile(annotations_file, 'r') as fid:
......@@ -288,7 +408,8 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
if image_id not in annotations_index:
missing_annotation_count += 1
annotations_index[image_id] = []
logging.info('%d images are missing annotations.', missing_annotation_count)
logging.info('%d images are missing annotations.',
missing_annotation_count)
keypoint_annotations_index = {}
if keypoint_annotations_file:
......@@ -301,8 +422,20 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
keypoint_annotations_index[image_id] = {}
keypoint_annotations_index[image_id][annotation['id']] = annotation
densepose_annotations_index = {}
if densepose_annotations_file:
with tf.gfile.GFile(densepose_annotations_file, 'r') as fid:
densepose_groundtruth_data = json.load(fid)
if 'annotations' in densepose_groundtruth_data:
for annotation in densepose_groundtruth_data['annotations']:
image_id = annotation['image_id']
if image_id not in densepose_annotations_index:
densepose_annotations_index[image_id] = {}
densepose_annotations_index[image_id][annotation['id']] = annotation
total_num_annotations_skipped = 0
total_num_keypoint_annotations_skipped = 0
total_num_densepose_annotations_skipped = 0
for idx, image in enumerate(images):
if idx % 100 == 0:
logging.info('On image %d of %d', idx, len(images))
......@@ -312,19 +445,31 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
keypoint_annotations_dict = {}
if image['id'] in keypoint_annotations_index:
keypoint_annotations_dict = keypoint_annotations_index[image['id']]
(_, tf_example, num_annotations_skipped,
num_keypoint_annotations_skipped) = create_tf_example(
densepose_annotations_dict = None
if densepose_annotations_file:
densepose_annotations_dict = {}
if image['id'] in densepose_annotations_index:
densepose_annotations_dict = densepose_annotations_index[image['id']]
(_, tf_example, num_annotations_skipped, num_keypoint_annotations_skipped,
num_densepose_annotations_skipped) = create_tf_example(
image, annotations_list, image_dir, category_index, include_masks,
keypoint_annotations_dict)
keypoint_annotations_dict, densepose_annotations_dict,
remove_non_person_annotations, remove_non_person_images)
total_num_annotations_skipped += num_annotations_skipped
total_num_keypoint_annotations_skipped += num_keypoint_annotations_skipped
total_num_densepose_annotations_skipped += (
num_densepose_annotations_skipped)
shard_idx = idx % num_shards
output_tfrecords[shard_idx].write(tf_example.SerializeToString())
if tf_example:
output_tfrecords[shard_idx].write(tf_example.SerializeToString())
logging.info('Finished writing, skipped %d annotations.',
total_num_annotations_skipped)
if keypoint_annotations_file:
logging.info('Finished writing, skipped %d keypoint annotations.',
total_num_keypoint_annotations_skipped)
if densepose_annotations_file:
logging.info('Finished writing, skipped %d DensePose annotations.',
total_num_densepose_annotations_skipped)
def main(_):
......@@ -347,20 +492,26 @@ def main(_):
train_output_path,
FLAGS.include_masks,
num_shards=100,
keypoint_annotations_file=FLAGS.train_keypoint_annotations_file)
keypoint_annotations_file=FLAGS.train_keypoint_annotations_file,
densepose_annotations_file=FLAGS.train_densepose_annotations_file,
remove_non_person_annotations=FLAGS.remove_non_person_annotations,
remove_non_person_images=FLAGS.remove_non_person_images)
_create_tf_record_from_coco_annotations(
FLAGS.val_annotations_file,
FLAGS.val_image_dir,
val_output_path,
FLAGS.include_masks,
num_shards=100,
keypoint_annotations_file=FLAGS.val_keypoint_annotations_file)
num_shards=50,
keypoint_annotations_file=FLAGS.val_keypoint_annotations_file,
densepose_annotations_file=FLAGS.val_densepose_annotations_file,
remove_non_person_annotations=FLAGS.remove_non_person_annotations,
remove_non_person_images=FLAGS.remove_non_person_images)
_create_tf_record_from_coco_annotations(
FLAGS.testdev_annotations_file,
FLAGS.test_image_dir,
testdev_output_path,
FLAGS.include_masks,
num_shards=100)
num_shards=50)
if __name__ == '__main__':
......
......@@ -89,7 +89,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
}
(_, example,
num_annotations_skipped, _) = create_coco_tf_record.create_tf_example(
num_annotations_skipped, _, _) = create_coco_tf_record.create_tf_example(
image, annotations_list, image_dir, category_index)
self.assertEqual(num_annotations_skipped, 0)
......@@ -156,7 +156,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
}
(_, example,
num_annotations_skipped, _) = create_coco_tf_record.create_tf_example(
num_annotations_skipped, _, _) = create_coco_tf_record.create_tf_example(
image, annotations_list, image_dir, category_index, include_masks=True)
self.assertEqual(num_annotations_skipped, 0)
......@@ -259,14 +259,14 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
}
}
(_, example, _,
num_keypoint_annotation_skipped) = create_coco_tf_record.create_tf_example(
image,
annotations_list,
image_dir,
category_index,
include_masks=False,
keypoint_annotations_dict=keypoint_annotations_dict)
_, example, _, num_keypoint_annotation_skipped, _ = (
create_coco_tf_record.create_tf_example(
image,
annotations_list,
image_dir,
category_index,
include_masks=False,
keypoint_annotations_dict=keypoint_annotations_dict))
self.assertEqual(num_keypoint_annotation_skipped, 0)
self._assertProtoEqual(
......@@ -310,6 +310,132 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
example.features.feature[
'image/object/keypoint/visibility'].int64_list.value, vv)
def test_create_tf_example_with_dense_pose(self):
image_dir = self.get_temp_dir()
image_file_name = 'tmp_image.jpg'
image_data = np.random.randint(low=0, high=256, size=(256, 256, 3)).astype(
np.uint8)
save_path = os.path.join(image_dir, image_file_name)
image = PIL.Image.fromarray(image_data, 'RGB')
image.save(save_path)
image = {
'file_name': image_file_name,
'height': 256,
'width': 256,
'id': 11,
}
min_x, min_y = 64, 64
max_x, max_y = 128, 128
keypoints = []
num_visible_keypoints = 0
xv = []
yv = []
vv = []
for _ in range(17):
xc = min_x + int(np.random.rand()*(max_x - min_x))
yc = min_y + int(np.random.rand()*(max_y - min_y))
vis = np.random.randint(0, 3)
xv.append(xc)
yv.append(yc)
vv.append(vis)
keypoints.extend([xc, yc, vis])
num_visible_keypoints += (vis > 0)
annotations_list = [{
'area': 0.5,
'iscrowd': False,
'image_id': 11,
'bbox': [64, 64, 128, 128],
'category_id': 1,
'id': 1000
}]
num_points = 45
dp_i = np.random.randint(1, 25, (num_points,)).astype(np.float32)
dp_u = np.random.randn(num_points)
dp_v = np.random.randn(num_points)
dp_x = np.random.rand(num_points)*256.
dp_y = np.random.rand(num_points)*256.
densepose_annotations_dict = {
1000: {
'dp_I': dp_i,
'dp_U': dp_u,
'dp_V': dp_v,
'dp_x': dp_x,
'dp_y': dp_y,
'bbox': [64, 64, 128, 128],
}
}
category_index = {
1: {
'name': 'person',
'id': 1
}
}
_, example, _, _, num_densepose_annotation_skipped = (
create_coco_tf_record.create_tf_example(
image,
annotations_list,
image_dir,
category_index,
include_masks=False,
densepose_annotations_dict=densepose_annotations_dict))
self.assertEqual(num_densepose_annotation_skipped, 0)
self._assertProtoEqual(
example.features.feature['image/height'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/width'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/filename'].bytes_list.value,
[six.b(image_file_name)])
self._assertProtoEqual(
example.features.feature['image/source_id'].bytes_list.value,
[six.b(str(image['id']))])
self._assertProtoEqual(
example.features.feature['image/format'].bytes_list.value,
[six.b('jpeg')])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/class/text'].bytes_list.value,
[six.b('person')])
self._assertProtoEqual(
example.features.feature['image/object/densepose/num'].int64_list.value,
[num_points])
self.assertAllEqual(
example.features.feature[
'image/object/densepose/part_index'].int64_list.value,
dp_i.astype(np.int64) - create_coco_tf_record._DP_PART_ID_OFFSET)
self.assertAllClose(
example.features.feature['image/object/densepose/u'].float_list.value,
dp_u)
self.assertAllClose(
example.features.feature['image/object/densepose/v'].float_list.value,
dp_v)
expected_dp_x = (64 + dp_x * 128. / 256.) / 256.
expected_dp_y = (64 + dp_y * 128. / 256.) / 256.
self.assertAllClose(
example.features.feature['image/object/densepose/x'].float_list.value,
expected_dp_x)
self.assertAllClose(
example.features.feature['image/object/densepose/y'].float_list.value,
expected_dp_y)
def test_create_sharded_tf_record(self):
tmp_dir = self.get_temp_dir()
image_paths = ['tmp1_image.jpg', 'tmp2_image.jpg']
......
......@@ -288,7 +288,7 @@ class SeqExampleUtilTest(tf.test.TestCase):
[0.75, 1.],
seq_feature_dict['region/bbox/xmax'].feature[0].float_list.value[:])
self.assertAllEqual(
['cat', 'frog'],
[b'cat', b'frog'],
seq_feature_dict['region/label/string'].feature[0].bytes_list.value[:])
self.assertAllClose(
[0.],
......@@ -332,7 +332,7 @@ class SeqExampleUtilTest(tf.test.TestCase):
[0.75],
seq_feature_dict['region/bbox/xmax'].feature[1].float_list.value[:])
self.assertAllEqual(
['cat'],
[b'cat'],
seq_feature_dict['region/label/string'].feature[1].bytes_list.value[:])
self.assertAllClose(
[],
......
......@@ -42,7 +42,7 @@ class OpenOutputTfrecordsTests(tf.test.TestCase):
tf_record_path = '{}-{:05d}-of-00010'.format(
os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), idx)
records = list(tf.python_io.tf_record_iterator(tf_record_path))
self.assertAllEqual(records, ['test_{}'.format(idx)])
self.assertAllEqual(records, ['test_{}'.format(idx).encode('utf-8')])
if __name__ == '__main__':
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment