Commit 657dcda5 authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

pull latest

parents 26e24e21 e6017471
...@@ -66,6 +66,11 @@ class InputDataFields(object): ...@@ -66,6 +66,11 @@ class InputDataFields(object):
groundtruth_keypoint_weights: groundtruth weight factor for keypoints. groundtruth_keypoint_weights: groundtruth weight factor for keypoints.
groundtruth_label_weights: groundtruth label weights. groundtruth_label_weights: groundtruth label weights.
groundtruth_weights: groundtruth weight factor for bounding boxes. groundtruth_weights: groundtruth weight factor for bounding boxes.
groundtruth_dp_num_points: The number of DensePose sampled points for each
instance.
groundtruth_dp_part_ids: Part indices for DensePose points.
groundtruth_dp_surface_coords: Image locations and UV coordinates for
DensePose points.
num_groundtruth_boxes: number of groundtruth boxes. num_groundtruth_boxes: number of groundtruth boxes.
is_annotated: whether an image has been labeled or not. is_annotated: whether an image has been labeled or not.
true_image_shapes: true shapes of images in the resized images, as resized true_image_shapes: true shapes of images in the resized images, as resized
...@@ -108,6 +113,9 @@ class InputDataFields(object): ...@@ -108,6 +113,9 @@ class InputDataFields(object):
groundtruth_keypoint_weights = 'groundtruth_keypoint_weights' groundtruth_keypoint_weights = 'groundtruth_keypoint_weights'
groundtruth_label_weights = 'groundtruth_label_weights' groundtruth_label_weights = 'groundtruth_label_weights'
groundtruth_weights = 'groundtruth_weights' groundtruth_weights = 'groundtruth_weights'
groundtruth_dp_num_points = 'groundtruth_dp_num_points'
groundtruth_dp_part_ids = 'groundtruth_dp_part_ids'
groundtruth_dp_surface_coords = 'groundtruth_dp_surface_coords'
num_groundtruth_boxes = 'num_groundtruth_boxes' num_groundtruth_boxes = 'num_groundtruth_boxes'
is_annotated = 'is_annotated' is_annotated = 'is_annotated'
true_image_shape = 'true_image_shape' true_image_shape = 'true_image_shape'
......
...@@ -30,6 +30,7 @@ from object_detection.core import data_decoder ...@@ -30,6 +30,7 @@ from object_detection.core import data_decoder
from object_detection.core import standard_fields as fields from object_detection.core import standard_fields as fields
from object_detection.protos import input_reader_pb2 from object_detection.protos import input_reader_pb2
from object_detection.utils import label_map_util from object_detection.utils import label_map_util
from object_detection.utils import shape_utils
# pylint: disable=g-import-not-at-top # pylint: disable=g-import-not-at-top
try: try:
...@@ -170,7 +171,8 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -170,7 +171,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
num_additional_channels=0, num_additional_channels=0,
load_multiclass_scores=False, load_multiclass_scores=False,
load_context_features=False, load_context_features=False,
expand_hierarchy_labels=False): expand_hierarchy_labels=False,
load_dense_pose=False):
"""Constructor sets keys_to_features and items_to_handlers. """Constructor sets keys_to_features and items_to_handlers.
Args: Args:
...@@ -201,6 +203,7 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -201,6 +203,7 @@ class TfExampleDecoder(data_decoder.DataDecoder):
account the provided hierarchy in the label_map_proto_file. For positive account the provided hierarchy in the label_map_proto_file. For positive
classes, the labels are extended to ancestor. For negative classes, classes, the labels are extended to ancestor. For negative classes,
the labels are expanded to descendants. the labels are expanded to descendants.
load_dense_pose: Whether to load DensePose annotations.
Raises: Raises:
ValueError: If `instance_mask_type` option is not one of ValueError: If `instance_mask_type` option is not one of
...@@ -371,6 +374,34 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -371,6 +374,34 @@ class TfExampleDecoder(data_decoder.DataDecoder):
self._decode_png_instance_masks)) self._decode_png_instance_masks))
else: else:
raise ValueError('Did not recognize the `instance_mask_type` option.') raise ValueError('Did not recognize the `instance_mask_type` option.')
if load_dense_pose:
self.keys_to_features['image/object/densepose/num'] = (
tf.VarLenFeature(tf.int64))
self.keys_to_features['image/object/densepose/part_index'] = (
tf.VarLenFeature(tf.int64))
self.keys_to_features['image/object/densepose/x'] = (
tf.VarLenFeature(tf.float32))
self.keys_to_features['image/object/densepose/y'] = (
tf.VarLenFeature(tf.float32))
self.keys_to_features['image/object/densepose/u'] = (
tf.VarLenFeature(tf.float32))
self.keys_to_features['image/object/densepose/v'] = (
tf.VarLenFeature(tf.float32))
self.items_to_handlers[
fields.InputDataFields.groundtruth_dp_num_points] = (
slim_example_decoder.Tensor('image/object/densepose/num'))
self.items_to_handlers[fields.InputDataFields.groundtruth_dp_part_ids] = (
slim_example_decoder.ItemHandlerCallback(
['image/object/densepose/part_index',
'image/object/densepose/num'], self._dense_pose_part_indices))
self.items_to_handlers[
fields.InputDataFields.groundtruth_dp_surface_coords] = (
slim_example_decoder.ItemHandlerCallback(
['image/object/densepose/x', 'image/object/densepose/y',
'image/object/densepose/u', 'image/object/densepose/v',
'image/object/densepose/num'],
self._dense_pose_surface_coordinates))
if label_map_proto_file: if label_map_proto_file:
# If the label_map_proto is provided, try to use it in conjunction with # If the label_map_proto is provided, try to use it in conjunction with
# the class text, and fall back to a materialized ID. # the class text, and fall back to a materialized ID.
...@@ -547,6 +578,14 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -547,6 +578,14 @@ class TfExampleDecoder(data_decoder.DataDecoder):
group_of = fields.InputDataFields.groundtruth_group_of group_of = fields.InputDataFields.groundtruth_group_of
tensor_dict[group_of] = tf.cast(tensor_dict[group_of], dtype=tf.bool) tensor_dict[group_of] = tf.cast(tensor_dict[group_of], dtype=tf.bool)
if fields.InputDataFields.groundtruth_dp_num_points in tensor_dict:
tensor_dict[fields.InputDataFields.groundtruth_dp_num_points] = tf.cast(
tensor_dict[fields.InputDataFields.groundtruth_dp_num_points],
dtype=tf.int32)
tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids] = tf.cast(
tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids],
dtype=tf.int32)
return tensor_dict return tensor_dict
def _reshape_keypoints(self, keys_to_tensors): def _reshape_keypoints(self, keys_to_tensors):
...@@ -697,6 +736,97 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -697,6 +736,97 @@ class TfExampleDecoder(data_decoder.DataDecoder):
lambda: tf.map_fn(decode_png_mask, png_masks, dtype=tf.float32), lambda: tf.map_fn(decode_png_mask, png_masks, dtype=tf.float32),
lambda: tf.zeros(tf.cast(tf.stack([0, height, width]), dtype=tf.int32))) lambda: tf.zeros(tf.cast(tf.stack([0, height, width]), dtype=tf.int32)))
def _dense_pose_part_indices(self, keys_to_tensors):
"""Creates a tensor that contains part indices for each DensePose point.
Args:
keys_to_tensors: a dictionary from keys to tensors.
Returns:
A 2-D int32 tensor of shape [num_instances, num_points] where each element
contains the DensePose part index (0-23). The value `num_points`
corresponds to the maximum number of sampled points across all instances
in the image. Note that instances with less sampled points will be padded
with zeros in the last dimension.
"""
num_points_per_instances = keys_to_tensors['image/object/densepose/num']
part_index = keys_to_tensors['image/object/densepose/part_index']
if isinstance(num_points_per_instances, tf.SparseTensor):
num_points_per_instances = tf.sparse_tensor_to_dense(
num_points_per_instances)
if isinstance(part_index, tf.SparseTensor):
part_index = tf.sparse_tensor_to_dense(part_index)
part_index = tf.cast(part_index, dtype=tf.int32)
max_points_per_instance = tf.cast(
tf.math.reduce_max(num_points_per_instances), dtype=tf.int32)
num_points_cumulative = tf.concat([
[0], tf.math.cumsum(num_points_per_instances)], axis=0)
def pad_parts_tensor(instance_ind):
points_range_start = num_points_cumulative[instance_ind]
points_range_end = num_points_cumulative[instance_ind + 1]
part_inds = part_index[points_range_start:points_range_end]
return shape_utils.pad_or_clip_nd(part_inds,
output_shape=[max_points_per_instance])
return tf.map_fn(pad_parts_tensor,
tf.range(tf.size(num_points_per_instances)),
dtype=tf.int32)
def _dense_pose_surface_coordinates(self, keys_to_tensors):
"""Creates a tensor that contains surface coords for each DensePose point.
Args:
keys_to_tensors: a dictionary from keys to tensors.
Returns:
A 3-D float32 tensor of shape [num_instances, num_points, 4] where each
point contains (y, x, v, u) data for each sampled DensePose point. The
(y, x) coordinate has normalized image locations for the point, and (v, u)
contains the surface coordinate (also normalized) for the part. The value
`num_points` corresponds to the maximum number of sampled points across
all instances in the image. Note that instances with less sampled points
will be padded with zeros in dim=1.
"""
num_points_per_instances = keys_to_tensors['image/object/densepose/num']
dp_y = keys_to_tensors['image/object/densepose/y']
dp_x = keys_to_tensors['image/object/densepose/x']
dp_v = keys_to_tensors['image/object/densepose/v']
dp_u = keys_to_tensors['image/object/densepose/u']
if isinstance(num_points_per_instances, tf.SparseTensor):
num_points_per_instances = tf.sparse_tensor_to_dense(
num_points_per_instances)
if isinstance(dp_y, tf.SparseTensor):
dp_y = tf.sparse_tensor_to_dense(dp_y)
if isinstance(dp_x, tf.SparseTensor):
dp_x = tf.sparse_tensor_to_dense(dp_x)
if isinstance(dp_v, tf.SparseTensor):
dp_v = tf.sparse_tensor_to_dense(dp_v)
if isinstance(dp_u, tf.SparseTensor):
dp_u = tf.sparse_tensor_to_dense(dp_u)
max_points_per_instance = tf.cast(
tf.math.reduce_max(num_points_per_instances), dtype=tf.int32)
num_points_cumulative = tf.concat([
[0], tf.math.cumsum(num_points_per_instances)], axis=0)
def pad_surface_coordinates_tensor(instance_ind):
"""Pads DensePose surface coordinates for each instance."""
points_range_start = num_points_cumulative[instance_ind]
points_range_end = num_points_cumulative[instance_ind + 1]
y = dp_y[points_range_start:points_range_end]
x = dp_x[points_range_start:points_range_end]
v = dp_v[points_range_start:points_range_end]
u = dp_u[points_range_start:points_range_end]
# Create [num_points_i, 4] tensor, where num_points_i is the number of
# sampled points for instance i.
unpadded_tensor = tf.stack([y, x, v, u], axis=1)
return shape_utils.pad_or_clip_nd(
unpadded_tensor, output_shape=[max_points_per_instance, 4])
return tf.map_fn(pad_surface_coordinates_tensor,
tf.range(tf.size(num_points_per_instances)),
dtype=tf.float32)
def _expand_image_label_hierarchy(self, image_classes, image_confidences): def _expand_image_label_hierarchy(self, image_classes, image_confidences):
"""Expand image level labels according to the hierarchy. """Expand image level labels according to the hierarchy.
......
...@@ -1096,8 +1096,8 @@ class TfExampleDecoderTest(test_case.TestCase): ...@@ -1096,8 +1096,8 @@ class TfExampleDecoderTest(test_case.TestCase):
return example_decoder.decode(tf.convert_to_tensor(example)) return example_decoder.decode(tf.convert_to_tensor(example))
tensor_dict = self.execute_cpu(graph_fn, []) tensor_dict = self.execute_cpu(graph_fn, [])
self.assertTrue( self.assertNotIn(fields.InputDataFields.groundtruth_instance_masks,
fields.InputDataFields.groundtruth_instance_masks not in tensor_dict) tensor_dict)
def testDecodeImageLabels(self): def testDecodeImageLabels(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
...@@ -1116,8 +1116,7 @@ class TfExampleDecoderTest(test_case.TestCase): ...@@ -1116,8 +1116,7 @@ class TfExampleDecoderTest(test_case.TestCase):
return example_decoder.decode(tf.convert_to_tensor(example)) return example_decoder.decode(tf.convert_to_tensor(example))
tensor_dict = self.execute_cpu(graph_fn_1, []) tensor_dict = self.execute_cpu(graph_fn_1, [])
self.assertTrue( self.assertIn(fields.InputDataFields.groundtruth_image_classes, tensor_dict)
fields.InputDataFields.groundtruth_image_classes in tensor_dict)
self.assertAllEqual( self.assertAllEqual(
tensor_dict[fields.InputDataFields.groundtruth_image_classes], tensor_dict[fields.InputDataFields.groundtruth_image_classes],
np.array([1, 2])) np.array([1, 2]))
...@@ -1152,8 +1151,7 @@ class TfExampleDecoderTest(test_case.TestCase): ...@@ -1152,8 +1151,7 @@ class TfExampleDecoderTest(test_case.TestCase):
return example_decoder.decode(tf.convert_to_tensor(example)) return example_decoder.decode(tf.convert_to_tensor(example))
tensor_dict = self.execute_cpu(graph_fn_2, []) tensor_dict = self.execute_cpu(graph_fn_2, [])
self.assertTrue( self.assertIn(fields.InputDataFields.groundtruth_image_classes, tensor_dict)
fields.InputDataFields.groundtruth_image_classes in tensor_dict)
self.assertAllEqual( self.assertAllEqual(
tensor_dict[fields.InputDataFields.groundtruth_image_classes], tensor_dict[fields.InputDataFields.groundtruth_image_classes],
np.array([1, 3])) np.array([1, 3]))
...@@ -1345,6 +1343,93 @@ class TfExampleDecoderTest(test_case.TestCase): ...@@ -1345,6 +1343,93 @@ class TfExampleDecoderTest(test_case.TestCase):
expected_image_confidence, expected_image_confidence,
tensor_dict[fields.InputDataFields.groundtruth_image_confidences]) tensor_dict[fields.InputDataFields.groundtruth_image_confidences])
def testDecodeDensePose(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg, _ = self._create_encoded_and_decoded_data(
image_tensor, 'jpeg')
bbox_ymins = [0.0, 4.0, 2.0]
bbox_xmins = [1.0, 5.0, 8.0]
bbox_ymaxs = [2.0, 6.0, 1.0]
bbox_xmaxs = [3.0, 7.0, 3.3]
densepose_num = [0, 4, 2]
densepose_part_index = [2, 2, 3, 4, 2, 9]
densepose_x = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]
densepose_y = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4]
densepose_u = [0.01, 0.02, 0.03, 0.04, 0.05, 0.06]
densepose_v = [0.99, 0.98, 0.97, 0.96, 0.95, 0.94]
def graph_fn():
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature(six.b('jpeg')),
'image/object/bbox/ymin':
dataset_util.float_list_feature(bbox_ymins),
'image/object/bbox/xmin':
dataset_util.float_list_feature(bbox_xmins),
'image/object/bbox/ymax':
dataset_util.float_list_feature(bbox_ymaxs),
'image/object/bbox/xmax':
dataset_util.float_list_feature(bbox_xmaxs),
'image/object/densepose/num':
dataset_util.int64_list_feature(densepose_num),
'image/object/densepose/part_index':
dataset_util.int64_list_feature(densepose_part_index),
'image/object/densepose/x':
dataset_util.float_list_feature(densepose_x),
'image/object/densepose/y':
dataset_util.float_list_feature(densepose_y),
'image/object/densepose/u':
dataset_util.float_list_feature(densepose_u),
'image/object/densepose/v':
dataset_util.float_list_feature(densepose_v),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder(
load_dense_pose=True)
output = example_decoder.decode(tf.convert_to_tensor(example))
dp_num_points = output[fields.InputDataFields.groundtruth_dp_num_points]
dp_part_ids = output[fields.InputDataFields.groundtruth_dp_part_ids]
dp_surface_coords = output[
fields.InputDataFields.groundtruth_dp_surface_coords]
return dp_num_points, dp_part_ids, dp_surface_coords
dp_num_points, dp_part_ids, dp_surface_coords = self.execute_cpu(
graph_fn, [])
expected_dp_num_points = [0, 4, 2]
expected_dp_part_ids = [
[0, 0, 0, 0],
[2, 2, 3, 4],
[2, 9, 0, 0]
]
expected_dp_surface_coords = np.array(
[
# Instance 0 (no points).
[[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]],
# Instance 1 (4 points).
[[0.9, 0.1, 0.99, 0.01],
[0.8, 0.2, 0.98, 0.02],
[0.7, 0.3, 0.97, 0.03],
[0.6, 0.4, 0.96, 0.04]],
# Instance 2 (2 points).
[[0.5, 0.5, 0.95, 0.05],
[0.4, 0.6, 0.94, 0.06],
[0., 0., 0., 0.],
[0., 0., 0., 0.]],
], dtype=np.float32)
self.assertAllEqual(dp_num_points, expected_dp_num_points)
self.assertAllEqual(dp_part_ids, expected_dp_part_ids)
self.assertAllClose(dp_surface_coords, expected_dp_surface_coords)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -43,6 +43,7 @@ from __future__ import absolute_import ...@@ -43,6 +43,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import argparse
import copy import copy
import datetime import datetime
import io import io
...@@ -51,62 +52,11 @@ import json ...@@ -51,62 +52,11 @@ import json
import os import os
from absl import app from absl import app
from absl import flags
import apache_beam as beam import apache_beam as beam
import numpy as np import numpy as np
import PIL.Image import PIL.Image
import six import six
import tensorflow as tf import tensorflow.compat.v1 as tf
from apache_beam import runners
flags.DEFINE_string('input_tfrecord', None, 'TFRecord containing images in '
'tf.Example format for object detection, with bounding'
'boxes and contextual feature embeddings.')
flags.DEFINE_string('output_tfrecord', None,
'TFRecord containing images in tf.Example format, with '
'added contextual memory banks.')
flags.DEFINE_string('sequence_key', None, 'Key to use when grouping sequences: '
'so far supports `image/seq_id` and `image/location`.')
flags.DEFINE_string('time_horizon', None, 'What time horizon to use when '
'splitting the data, if any. Options are: `year`, `month`,'
' `week`, `day `, `hour`, `minute`, `None`.')
flags.DEFINE_integer('subsample_context_features_rate', 0, 'Whether to '
'subsample the context_features, and if so how many to '
'sample. If the rate is set to X, it will sample context '
'from 1 out of every X images. Default is sampling from '
'every image, which is X=0.')
flags.DEFINE_boolean('reduce_image_size', True, 'downsamples images to'
'have longest side max_image_dimension, maintaining aspect'
' ratio')
flags.DEFINE_integer('max_image_dimension', 1024, 'sets max image dimension')
flags.DEFINE_boolean('add_context_features', True, 'adds a memory bank of'
'embeddings to each clip')
flags.DEFINE_boolean('sorted_image_ids', True, 'whether the image source_ids '
'are sortable to deal with date_captured tie-breaks')
flags.DEFINE_string('image_ids_to_keep', 'All', 'path to .json list of image'
'ids to keep, used for ground truth eval creation')
flags.DEFINE_boolean('keep_context_features_image_id_list', False, 'Whether or '
'not to keep a list of the image_ids corresponding to the '
'memory bank')
flags.DEFINE_boolean('keep_only_positives', False, 'Whether or not to '
'keep only positive boxes based on score')
flags.DEFINE_boolean('keep_only_positives_gt', False, 'Whether or not to '
'keep only positive boxes based on gt class')
flags.DEFINE_float('context_features_score_threshold', 0.7, 'What score '
'threshold to use for boxes in context_features')
flags.DEFINE_integer('max_num_elements_in_context_features', 2000, 'Sets max '
'num elements per memory bank')
flags.DEFINE_integer('num_shards', 0, 'Number of output shards.')
flags.DEFINE_string('output_type', 'tf_sequence_example', 'Output type, one of '
'`tf_example`, `tf_sequence_example`')
flags.DEFINE_integer('max_clip_length', None, 'Max length for sequence '
'example outputs.')
FLAGS = flags.FLAGS
DEFAULT_FEATURE_LENGTH = 2057
class ReKeyDataFn(beam.DoFn): class ReKeyDataFn(beam.DoFn):
...@@ -406,7 +356,8 @@ class GenerateContextFn(beam.DoFn): ...@@ -406,7 +356,8 @@ class GenerateContextFn(beam.DoFn):
keep_only_positives_gt=False, keep_only_positives_gt=False,
max_num_elements_in_context_features=5000, max_num_elements_in_context_features=5000,
pad_context_features=False, pad_context_features=False,
output_type='tf_example', max_clip_length=None): output_type='tf_example', max_clip_length=None,
context_feature_length=2057):
"""Initialization function. """Initialization function.
Args: Args:
...@@ -432,6 +383,8 @@ class GenerateContextFn(beam.DoFn): ...@@ -432,6 +383,8 @@ class GenerateContextFn(beam.DoFn):
output_type: What type of output, tf_example of tf_sequence_example output_type: What type of output, tf_example of tf_sequence_example
max_clip_length: The maximum length of a sequence example, before max_clip_length: The maximum length of a sequence example, before
splitting into multiple splitting into multiple
context_feature_length: The length of the context feature embeddings
stored in the input data.
""" """
self._session = None self._session = None
self._num_examples_processed = beam.metrics.Metrics.counter( self._num_examples_processed = beam.metrics.Metrics.counter(
...@@ -456,6 +409,7 @@ class GenerateContextFn(beam.DoFn): ...@@ -456,6 +409,7 @@ class GenerateContextFn(beam.DoFn):
self._context_features_score_threshold = context_features_score_threshold self._context_features_score_threshold = context_features_score_threshold
self._max_num_elements_in_context_features = ( self._max_num_elements_in_context_features = (
max_num_elements_in_context_features) max_num_elements_in_context_features)
self._context_feature_length = context_feature_length
self._images_kept = beam.metrics.Metrics.counter( self._images_kept = beam.metrics.Metrics.counter(
'sequence_data_generation', 'images_kept') 'sequence_data_generation', 'images_kept')
...@@ -506,9 +460,9 @@ class GenerateContextFn(beam.DoFn): ...@@ -506,9 +460,9 @@ class GenerateContextFn(beam.DoFn):
context_features_image_id_list.append(example_image_id) context_features_image_id_list.append(example_image_id)
if not example_embedding: if not example_embedding:
example_embedding.append(np.zeros(DEFAULT_FEATURE_LENGTH)) example_embedding.append(np.zeros(self._context_feature_length))
feature_length = DEFAULT_FEATURE_LENGTH feature_length = self._context_feature_length
# If the example_list is not empty and image/embedding_length is in the # If the example_list is not empty and image/embedding_length is in the
# featture dict, feature_length will be assigned to that. Otherwise, it will # featture dict, feature_length will be assigned to that. Otherwise, it will
...@@ -703,7 +657,8 @@ class GenerateContextFn(beam.DoFn): ...@@ -703,7 +657,8 @@ class GenerateContextFn(beam.DoFn):
return list_of_examples return list_of_examples
def construct_pipeline(input_tfrecord, def construct_pipeline(pipeline,
input_tfrecord,
output_tfrecord, output_tfrecord,
sequence_key, sequence_key,
time_horizon=None, time_horizon=None,
...@@ -720,10 +675,12 @@ def construct_pipeline(input_tfrecord, ...@@ -720,10 +675,12 @@ def construct_pipeline(input_tfrecord,
max_num_elements_in_context_features=5000, max_num_elements_in_context_features=5000,
num_shards=0, num_shards=0,
output_type='tf_example', output_type='tf_example',
max_clip_length=None): max_clip_length=None,
context_feature_length=2057):
"""Returns a beam pipeline to run object detection inference. """Returns a beam pipeline to run object detection inference.
Args: Args:
pipeline: Initialized beam pipeline.
input_tfrecord: An TFRecord of tf.train.Example protos containing images. input_tfrecord: An TFRecord of tf.train.Example protos containing images.
output_tfrecord: An TFRecord of tf.train.Example protos that contain images output_tfrecord: An TFRecord of tf.train.Example protos that contain images
in the input TFRecord and the detections from the model. in the input TFRecord and the detections from the model.
...@@ -755,91 +712,224 @@ def construct_pipeline(input_tfrecord, ...@@ -755,91 +712,224 @@ def construct_pipeline(input_tfrecord,
output_type: What type of output, tf_example of tf_sequence_example output_type: What type of output, tf_example of tf_sequence_example
max_clip_length: The maximum length of a sequence example, before max_clip_length: The maximum length of a sequence example, before
splitting into multiple splitting into multiple
context_feature_length: The length of the context feature embeddings stored
in the input data.
""" """
def pipeline(root): if output_type == 'tf_example':
if output_type == 'tf_example': coder = beam.coders.ProtoCoder(tf.train.Example)
coder = beam.coders.ProtoCoder(tf.train.Example) elif output_type == 'tf_sequence_example':
elif output_type == 'tf_sequence_example': coder = beam.coders.ProtoCoder(tf.train.SequenceExample)
coder = beam.coders.ProtoCoder(tf.train.SequenceExample) else:
else: raise ValueError('Unsupported output type.')
raise ValueError('Unsupported output type.') input_collection = (
input_collection = ( pipeline | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
root | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord( input_tfrecord,
input_tfrecord, coder=beam.coders.BytesCoder()))
coder=beam.coders.BytesCoder())) rekey_collection = input_collection | 'RekeyExamples' >> beam.ParDo(
rekey_collection = input_collection | 'RekeyExamples' >> beam.ParDo( ReKeyDataFn(sequence_key, time_horizon,
ReKeyDataFn(sequence_key, time_horizon, reduce_image_size, max_image_dimension))
reduce_image_size, max_image_dimension)) grouped_collection = (
grouped_collection = ( rekey_collection | 'GroupBySequenceKey' >> beam.GroupByKey())
rekey_collection | 'GroupBySequenceKey' >> beam.GroupByKey()) grouped_collection = (
grouped_collection = ( grouped_collection | 'ReshuffleGroups' >> beam.Reshuffle())
grouped_collection | 'ReshuffleGroups' >> beam.Reshuffle()) ordered_collection = (
ordered_collection = ( grouped_collection | 'OrderByFrameNumber' >> beam.ParDo(
grouped_collection | 'OrderByFrameNumber' >> beam.ParDo( SortGroupedDataFn(sequence_key, sorted_image_ids,
SortGroupedDataFn(sequence_key, sorted_image_ids, max_num_elements_in_context_features)))
max_num_elements_in_context_features))) ordered_collection = (
ordered_collection = ( ordered_collection | 'ReshuffleSortedGroups' >> beam.Reshuffle())
ordered_collection | 'ReshuffleSortedGroups' >> beam.Reshuffle()) output_collection = (
output_collection = ( ordered_collection | 'AddContextToExamples' >> beam.ParDo(
ordered_collection | 'AddContextToExamples' >> beam.ParDo( GenerateContextFn(
GenerateContextFn( sequence_key, add_context_features, image_ids_to_keep,
sequence_key, add_context_features, image_ids_to_keep, keep_context_features_image_id_list=(
keep_context_features_image_id_list=( keep_context_features_image_id_list),
keep_context_features_image_id_list), subsample_context_features_rate=subsample_context_features_rate,
subsample_context_features_rate=subsample_context_features_rate, keep_only_positives=keep_only_positives,
keep_only_positives=keep_only_positives, keep_only_positives_gt=keep_only_positives_gt,
keep_only_positives_gt=keep_only_positives_gt, context_features_score_threshold=(
context_features_score_threshold=( context_features_score_threshold),
context_features_score_threshold), max_num_elements_in_context_features=(
max_num_elements_in_context_features=( max_num_elements_in_context_features),
max_num_elements_in_context_features), output_type=output_type,
output_type=output_type, max_clip_length=max_clip_length,
max_clip_length=max_clip_length))) context_feature_length=context_feature_length)))
output_collection = ( output_collection = (
output_collection | 'ReshuffleExamples' >> beam.Reshuffle()) output_collection | 'ReshuffleExamples' >> beam.Reshuffle())
_ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord( _ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
output_tfrecord, output_tfrecord,
num_shards=num_shards, num_shards=num_shards,
coder=coder) coder=coder)
return pipeline
def parse_args(argv):
def main(_): """Command-line argument parser.
"""Runs the Beam pipeline that builds context features.
Args: Args:
_: unused argv: command line arguments
Returns:
beam_args: Arguments for the beam pipeline.
pipeline_args: Arguments for the pipeline options, such as runner type.
""" """
# must create before flags are used parser = argparse.ArgumentParser()
runner = runners.DirectRunner() parser.add_argument(
'--input_tfrecord',
dest='input_tfrecord',
required=True,
help='TFRecord containing images in tf.Example format for object '
'detection, with bounding boxes and contextual feature embeddings.')
parser.add_argument(
'--output_tfrecord',
dest='output_tfrecord',
required=True,
help='TFRecord containing images in tf.Example format, with added '
'contextual memory banks.')
parser.add_argument(
'--sequence_key',
dest='sequence_key',
default='image/location',
help='Key to use when grouping sequences: so far supports `image/seq_id` '
'and `image/location`.')
parser.add_argument(
'--context_feature_length',
dest='context_feature_length',
default=2057,
help='The length of the context feature embeddings stored in the input '
'data.')
parser.add_argument(
'--time_horizon',
dest='time_horizon',
default=None,
help='What time horizon to use when splitting the data, if any. Options '
'are: `year`, `month`, `week`, `day `, `hour`, `minute`, `None`.')
parser.add_argument(
'--subsample_context_features_rate',
dest='subsample_context_features_rate',
default=0,
help='Whether to subsample the context_features, and if so how many to '
'sample. If the rate is set to X, it will sample context from 1 out of '
'every X images. Default is sampling from every image, which is X=0.')
parser.add_argument(
'--reduce_image_size',
dest='reduce_image_size',
default=True,
help='downsamples images to have longest side max_image_dimension, '
'maintaining aspect ratio')
parser.add_argument(
'--max_image_dimension',
dest='max_image_dimension',
default=1024,
help='Sets max image dimension for resizing.')
parser.add_argument(
'--add_context_features',
dest='add_context_features',
default=True,
help='Adds a memory bank of embeddings to each clip')
parser.add_argument(
'--sorted_image_ids',
dest='sorted_image_ids',
default=True,
help='Whether the image source_ids are sortable to deal with '
'date_captured tie-breaks.')
parser.add_argument(
'--image_ids_to_keep',
dest='image_ids_to_keep',
default='All',
help='Path to .json list of image ids to keep, used for ground truth '
'eval creation.')
parser.add_argument(
'--keep_context_features_image_id_list',
dest='keep_context_features_image_id_list',
default=False,
help='Whether or not to keep a list of the image_ids corresponding to '
'the memory bank.')
parser.add_argument(
'--keep_only_positives',
dest='keep_only_positives',
default=False,
help='Whether or not to keep only positive boxes based on score.')
parser.add_argument(
'--context_features_score_threshold',
dest='context_features_score_threshold',
default=0.7,
help='What score threshold to use for boxes in context_features, when '
'`keep_only_positives` is set to `True`.')
parser.add_argument(
'--keep_only_positives_gt',
dest='keep_only_positives_gt',
default=False,
help='Whether or not to keep only positive boxes based on gt class.')
parser.add_argument(
'--max_num_elements_in_context_features',
dest='max_num_elements_in_context_features',
default=2000,
help='Sets max number of context feature elements per memory bank. '
'If the number of images in the context group is greater than '
'`max_num_elements_in_context_features`, the context group will be split.'
)
parser.add_argument(
'--output_type',
dest='output_type',
default='tf_example',
help='Output type, one of `tf_example`, `tf_sequence_example`.')
parser.add_argument(
'--max_clip_length',
dest='max_clip_length',
default=None,
help='Max length for sequence example outputs.')
parser.add_argument(
'--num_shards',
dest='num_shards',
default=0,
help='Number of output shards.')
beam_args, pipeline_args = parser.parse_known_args(argv)
return beam_args, pipeline_args
def main(argv=None, save_main_session=True):
"""Runs the Beam pipeline that performs inference.
dirname = os.path.dirname(FLAGS.output_tfrecord) Args:
argv: Command line arguments.
save_main_session: Whether to save the main session.
"""
args, pipeline_args = parse_args(argv)
pipeline_options = beam.options.pipeline_options.PipelineOptions(
pipeline_args)
pipeline_options.view_as(
beam.options.pipeline_options.SetupOptions).save_main_session = (
save_main_session)
dirname = os.path.dirname(args.output_tfrecord)
tf.io.gfile.makedirs(dirname) tf.io.gfile.makedirs(dirname)
runner.run(
construct_pipeline(FLAGS.input_tfrecord, p = beam.Pipeline(options=pipeline_options)
FLAGS.output_tfrecord,
FLAGS.sequence_key, construct_pipeline(
FLAGS.time_horizon, p,
FLAGS.subsample_context_features_rate, args.input_tfrecord,
FLAGS.reduce_image_size, args.output_tfrecord,
FLAGS.max_image_dimension, args.sequence_key,
FLAGS.add_context_features, args.time_horizon,
FLAGS.sorted_image_ids, args.subsample_context_features_rate,
FLAGS.image_ids_to_keep, args.reduce_image_size,
FLAGS.keep_context_features_image_id_list, args.max_image_dimension,
FLAGS.keep_only_positives, args.add_context_features,
FLAGS.context_features_score_threshold, args.sorted_image_ids,
FLAGS.keep_only_positives_gt, args.image_ids_to_keep,
FLAGS.max_num_elements_in_context_features, args.keep_context_features_image_id_list,
FLAGS.num_shards, args.keep_only_positives,
FLAGS.output_type, args.context_features_score_threshold,
FLAGS.max_clip_length)) args.keep_only_positives_gt,
args.max_num_elements_in_context_features,
args.output_type,
args.max_clip_length,
args.context_feature_length)
p.run()
if __name__ == '__main__': if __name__ == '__main__':
flags.mark_flags_as_required([
'input_tfrecord',
'output_tfrecord'
])
app.run(main) app.run(main)
...@@ -22,13 +22,13 @@ import datetime ...@@ -22,13 +22,13 @@ import datetime
import os import os
import tempfile import tempfile
import unittest import unittest
import apache_beam as beam
import numpy as np import numpy as np
import six import six
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from object_detection.dataset_tools.context_rcnn import add_context_to_examples from object_detection.dataset_tools.context_rcnn import add_context_to_examples
from object_detection.utils import tf_version from object_detection.utils import tf_version
from apache_beam import runners
@contextlib.contextmanager @contextlib.contextmanager
...@@ -200,7 +200,7 @@ class GenerateContextDataTest(tf.test.TestCase): ...@@ -200,7 +200,7 @@ class GenerateContextDataTest(tf.test.TestCase):
seq_feature_dict['region/label/string'].feature[1].bytes_list.value[:]) seq_feature_dict['region/label/string'].feature[1].bytes_list.value[:])
def assert_expected_key(self, key): def assert_expected_key(self, key):
self.assertAllEqual(key, '01') self.assertAllEqual(key, b'01')
def assert_sorted(self, example_collection): def assert_sorted(self, example_collection):
example_list = list(example_collection) example_list = list(example_collection)
...@@ -329,19 +329,22 @@ class GenerateContextDataTest(tf.test.TestCase): ...@@ -329,19 +329,22 @@ class GenerateContextDataTest(tf.test.TestCase):
with InMemoryTFRecord( with InMemoryTFRecord(
[self._create_first_tf_example(), [self._create_first_tf_example(),
self._create_second_tf_example()]) as input_tfrecord: self._create_second_tf_example()]) as input_tfrecord:
runner = runners.DirectRunner()
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR')) temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
output_tfrecord = os.path.join(temp_dir, 'output_tfrecord') output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
sequence_key = six.ensure_binary('image/seq_id') sequence_key = six.ensure_binary('image/seq_id')
max_num_elements = 10 max_num_elements = 10
num_shards = 1 num_shards = 1
pipeline = add_context_to_examples.construct_pipeline( pipeline_options = beam.options.pipeline_options.PipelineOptions(
runner='DirectRunner')
p = beam.Pipeline(options=pipeline_options)
add_context_to_examples.construct_pipeline(
p,
input_tfrecord, input_tfrecord,
output_tfrecord, output_tfrecord,
sequence_key, sequence_key,
max_num_elements_in_context_features=max_num_elements, max_num_elements_in_context_features=max_num_elements,
num_shards=num_shards) num_shards=num_shards)
runner.run(pipeline) p.run()
filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????') filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
actual_output = [] actual_output = []
record_iterator = tf.python_io.tf_record_iterator(path=filenames[0]) record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
...@@ -355,20 +358,23 @@ class GenerateContextDataTest(tf.test.TestCase): ...@@ -355,20 +358,23 @@ class GenerateContextDataTest(tf.test.TestCase):
with InMemoryTFRecord( with InMemoryTFRecord(
[self._create_first_tf_example(), [self._create_first_tf_example(),
self._create_second_tf_example()]) as input_tfrecord: self._create_second_tf_example()]) as input_tfrecord:
runner = runners.DirectRunner()
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR')) temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
output_tfrecord = os.path.join(temp_dir, 'output_tfrecord') output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
sequence_key = six.ensure_binary('image/seq_id') sequence_key = six.ensure_binary('image/seq_id')
max_num_elements = 10 max_num_elements = 10
num_shards = 1 num_shards = 1
pipeline = add_context_to_examples.construct_pipeline( pipeline_options = beam.options.pipeline_options.PipelineOptions(
runner='DirectRunner')
p = beam.Pipeline(options=pipeline_options)
add_context_to_examples.construct_pipeline(
p,
input_tfrecord, input_tfrecord,
output_tfrecord, output_tfrecord,
sequence_key, sequence_key,
max_num_elements_in_context_features=max_num_elements, max_num_elements_in_context_features=max_num_elements,
num_shards=num_shards, num_shards=num_shards,
output_type='tf_sequence_example') output_type='tf_sequence_example')
runner.run(pipeline) p.run()
filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????') filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
actual_output = [] actual_output = []
record_iterator = tf.python_io.tf_record_iterator( record_iterator = tf.python_io.tf_record_iterator(
......
...@@ -33,32 +33,19 @@ from __future__ import absolute_import ...@@ -33,32 +33,19 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import argparse
import hashlib import hashlib
import io import io
import json import json
import logging import logging
import os import os
from absl import app from absl import app
from absl import flags
import apache_beam as beam import apache_beam as beam
import numpy as np import numpy as np
import PIL.Image import PIL.Image
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from apache_beam import runners
from object_detection.utils import dataset_util from object_detection.utils import dataset_util
flags.DEFINE_string('image_directory', None, 'Directory where images are '
'stored')
flags.DEFINE_string('output_tfrecord_prefix', None,
'TFRecord containing images in tf.Example format.')
flags.DEFINE_string('input_annotations_file', None, 'Path to Coco-CameraTraps'
'style annotations file')
flags.DEFINE_integer('num_images_per_shard',
200,
'The number of images to be stored in each shard.')
FLAGS = flags.FLAGS
class ParseImage(beam.DoFn): class ParseImage(beam.DoFn):
"""A DoFn that parses a COCO-CameraTraps json and emits TFRecords.""" """A DoFn that parses a COCO-CameraTraps json and emits TFRecords."""
...@@ -243,13 +230,14 @@ class ParseImage(beam.DoFn): ...@@ -243,13 +230,14 @@ class ParseImage(beam.DoFn):
return [(example)] return [(example)]
def _load_json_data(data_file): def load_json_data(data_file):
with tf.io.gfile.GFile(data_file, 'r') as fid: with tf.io.gfile.GFile(data_file, 'r') as fid:
data_dict = json.load(fid) data_dict = json.load(fid)
return data_dict return data_dict
def create_pipeline(image_directory, def create_pipeline(pipeline,
image_directory,
input_annotations_file, input_annotations_file,
output_tfrecord_prefix=None, output_tfrecord_prefix=None,
num_images_per_shard=200, num_images_per_shard=200,
...@@ -257,68 +245,97 @@ def create_pipeline(image_directory, ...@@ -257,68 +245,97 @@ def create_pipeline(image_directory,
"""Creates a beam pipeline for producing a COCO-CameraTraps Image dataset. """Creates a beam pipeline for producing a COCO-CameraTraps Image dataset.
Args: Args:
pipeline: Initialized beam pipeline.
image_directory: Path to image directory image_directory: Path to image directory
input_annotations_file: Path to a coco-cameratraps annotation file input_annotations_file: Path to a coco-cameratraps annotation file
output_tfrecord_prefix: Absolute path for tfrecord outputs. Final files will output_tfrecord_prefix: Absolute path for tfrecord outputs. Final files will
be named {output_tfrecord_prefix}@N. be named {output_tfrecord_prefix}@N.
num_images_per_shard: The number of images to store in each shard num_images_per_shard: The number of images to store in each shard
keep_bboxes: Whether to keep any bounding boxes that exist in the json file keep_bboxes: Whether to keep any bounding boxes that exist in the json file
Returns:
A Beam pipeline.
""" """
logging.info('Reading data from COCO-CameraTraps Dataset.') logging.info('Reading data from COCO-CameraTraps Dataset.')
data = _load_json_data(input_annotations_file) data = load_json_data(input_annotations_file)
num_shards = int(np.ceil(float(len(data['images']))/num_images_per_shard)) num_shards = int(np.ceil(float(len(data['images']))/num_images_per_shard))
def pipeline(root): image_examples = (
"""Builds beam pipeline.""" pipeline | ('CreateCollections') >> beam.Create(
[im['id'] for im in data['images']])
| ('ParseImage') >> beam.ParDo(ParseImage(
image_directory, data['images'], data['annotations'],
data['categories'], keep_bboxes=keep_bboxes)))
_ = (image_examples
| ('Reshuffle') >> beam.Reshuffle()
| ('WriteTfImageExample') >> beam.io.tfrecordio.WriteToTFRecord(
output_tfrecord_prefix,
num_shards=num_shards,
coder=beam.coders.ProtoCoder(tf.train.Example)))
image_examples = (
root
| ('CreateCollections') >> beam.Create(
[im['id'] for im in data['images']])
| ('ParseImage') >> beam.ParDo(ParseImage(
image_directory, data['images'], data['annotations'],
data['categories'], keep_bboxes=keep_bboxes)))
_ = (image_examples
| ('Reshuffle') >> beam.Reshuffle()
| ('WriteTfImageExample') >> beam.io.tfrecordio.WriteToTFRecord(
output_tfrecord_prefix,
num_shards=num_shards,
coder=beam.coders.ProtoCoder(tf.train.Example)))
return pipeline def parse_args(argv):
"""Command-line argument parser.
Args:
def main(_): argv: command line arguments
Returns:
beam_args: Arguments for the beam pipeline.
pipeline_args: Arguments for the pipeline options, such as runner type.
"""
parser = argparse.ArgumentParser()
parser.add_argument(
'--image_directory',
dest='image_directory',
required=True,
help='Path to the directory where the images are stored.')
parser.add_argument(
'--output_tfrecord_prefix',
dest='output_tfrecord_prefix',
required=True,
help='Path and prefix to store TFRecords containing images in tf.Example'
'format.')
parser.add_argument(
'--input_annotations_file',
dest='input_annotations_file',
required=True,
help='Path to Coco-CameraTraps style annotations file.')
parser.add_argument(
'--num_images_per_shard',
dest='num_images_per_shard',
default=200,
help='The number of images to be stored in each outputshard.')
beam_args, pipeline_args = parser.parse_known_args(argv)
return beam_args, pipeline_args
def main(argv=None, save_main_session=True):
"""Runs the Beam pipeline that performs inference. """Runs the Beam pipeline that performs inference.
Args: Args:
_: unused argv: Command line arguments.
save_main_session: Whether to save the main session.
""" """
args, pipeline_args = parse_args(argv)
# must create before flags are used pipeline_options = beam.options.pipeline_options.PipelineOptions(
runner = runners.DirectRunner() pipeline_args)
pipeline_options.view_as(
beam.options.pipeline_options.SetupOptions).save_main_session = (
save_main_session)
dirname = os.path.dirname(FLAGS.output_tfrecord_prefix) dirname = os.path.dirname(args.output_tfrecord_prefix)
tf.io.gfile.makedirs(dirname) tf.io.gfile.makedirs(dirname)
runner.run( p = beam.Pipeline(options=pipeline_options)
create_pipeline( create_pipeline(
image_directory=FLAGS.image_directory, pipeline=p,
input_annotations_file=FLAGS.input_annotations_file, image_directory=args.image_directory,
output_tfrecord_prefix=FLAGS.output_tfrecord_prefix, input_annotations_file=args.input_annotations_file,
num_images_per_shard=FLAGS.num_images_per_shard)) output_tfrecord_prefix=args.output_tfrecord_prefix,
num_images_per_shard=args.num_images_per_shard)
p.run()
if __name__ == '__main__': if __name__ == '__main__':
flags.mark_flags_as_required([
'image_directory',
'input_annotations_file',
'output_tfrecord_prefix'
])
app.run(main) app.run(main)
...@@ -21,13 +21,14 @@ import json ...@@ -21,13 +21,14 @@ import json
import os import os
import tempfile import tempfile
import unittest import unittest
import apache_beam as beam
import numpy as np import numpy as np
from PIL import Image from PIL import Image
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from object_detection.dataset_tools.context_rcnn import create_cococameratraps_tfexample_main from object_detection.dataset_tools.context_rcnn import create_cococameratraps_tfexample_main
from object_detection.utils import tf_version from object_detection.utils import tf_version
from apache_beam import runners
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.') @unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
...@@ -95,13 +96,13 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase): ...@@ -95,13 +96,13 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
.int64_list.value, [1]) .int64_list.value, [1])
self.assertAllEqual( self.assertAllEqual(
example.features.feature['image/object/class/text'] example.features.feature['image/object/class/text']
.bytes_list.value, ['animal']) .bytes_list.value, [b'animal'])
self.assertAllClose( self.assertAllClose(
example.features.feature['image/class/label'] example.features.feature['image/class/label']
.int64_list.value, [1]) .int64_list.value, [1])
self.assertAllEqual( self.assertAllEqual(
example.features.feature['image/class/text'] example.features.feature['image/class/text']
.bytes_list.value, ['animal']) .bytes_list.value, [b'animal'])
# Check other essential attributes. # Check other essential attributes.
self.assertAllEqual( self.assertAllEqual(
...@@ -112,7 +113,7 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase): ...@@ -112,7 +113,7 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
[self.IMAGE_WIDTH]) [self.IMAGE_WIDTH])
self.assertAllEqual( self.assertAllEqual(
example.features.feature['image/source_id'].bytes_list.value, example.features.feature['image/source_id'].bytes_list.value,
['im_0']) [b'im_0'])
self.assertTrue( self.assertTrue(
example.features.feature['image/encoded'].bytes_list.value) example.features.feature['image/encoded'].bytes_list.value)
...@@ -134,13 +135,13 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase): ...@@ -134,13 +135,13 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
.int64_list.value, [1]) .int64_list.value, [1])
self.assertAllEqual( self.assertAllEqual(
example.features.feature['image/object/class/text'] example.features.feature['image/object/class/text']
.bytes_list.value, ['animal']) .bytes_list.value, [b'animal'])
self.assertAllClose( self.assertAllClose(
example.features.feature['image/class/label'] example.features.feature['image/class/label']
.int64_list.value, [1]) .int64_list.value, [1])
self.assertAllEqual( self.assertAllEqual(
example.features.feature['image/class/text'] example.features.feature['image/class/text']
.bytes_list.value, ['animal']) .bytes_list.value, [b'animal'])
# Check other essential attributes. # Check other essential attributes.
self.assertAllEqual( self.assertAllEqual(
...@@ -151,21 +152,23 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase): ...@@ -151,21 +152,23 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
[self.IMAGE_WIDTH]) [self.IMAGE_WIDTH])
self.assertAllEqual( self.assertAllEqual(
example.features.feature['image/source_id'].bytes_list.value, example.features.feature['image/source_id'].bytes_list.value,
['im_0']) [b'im_0'])
self.assertTrue( self.assertTrue(
example.features.feature['image/encoded'].bytes_list.value) example.features.feature['image/encoded'].bytes_list.value)
def test_beam_pipeline(self): def test_beam_pipeline(self):
runner = runners.DirectRunner()
num_frames = 1 num_frames = 1
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR')) temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
json_path = self._create_json_file(temp_dir, num_frames) json_path = self._create_json_file(temp_dir, num_frames)
output_tfrecord = temp_dir+'/output' output_tfrecord = temp_dir+'/output'
self._write_random_images_to_directory(temp_dir, num_frames) self._write_random_images_to_directory(temp_dir, num_frames)
pipeline = create_cococameratraps_tfexample_main.create_pipeline( pipeline_options = beam.options.pipeline_options.PipelineOptions(
temp_dir, json_path, runner='DirectRunner')
p = beam.Pipeline(options=pipeline_options)
create_cococameratraps_tfexample_main.create_pipeline(
p, temp_dir, json_path,
output_tfrecord_prefix=output_tfrecord) output_tfrecord_prefix=output_tfrecord)
runner.run(pipeline) p.run()
filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????') filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
actual_output = [] actual_output = []
record_iterator = tf.python_io.tf_record_iterator(path=filenames[0]) record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
...@@ -176,17 +179,19 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase): ...@@ -176,17 +179,19 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
actual_output[0])) actual_output[0]))
def test_beam_pipeline_bbox(self): def test_beam_pipeline_bbox(self):
runner = runners.DirectRunner()
num_frames = 1 num_frames = 1
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR')) temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
json_path = self._create_json_file(temp_dir, num_frames, keep_bboxes=True) json_path = self._create_json_file(temp_dir, num_frames, keep_bboxes=True)
output_tfrecord = temp_dir+'/output' output_tfrecord = temp_dir+'/output'
self._write_random_images_to_directory(temp_dir, num_frames) self._write_random_images_to_directory(temp_dir, num_frames)
pipeline = create_cococameratraps_tfexample_main.create_pipeline( pipeline_options = beam.options.pipeline_options.PipelineOptions(
temp_dir, json_path, runner='DirectRunner')
p = beam.Pipeline(options=pipeline_options)
create_cococameratraps_tfexample_main.create_pipeline(
p, temp_dir, json_path,
output_tfrecord_prefix=output_tfrecord, output_tfrecord_prefix=output_tfrecord,
keep_bboxes=True) keep_bboxes=True)
runner.run(pipeline) p.run()
filenames = tf.io.gfile.glob(output_tfrecord+'-?????-of-?????') filenames = tf.io.gfile.glob(output_tfrecord+'-?????-of-?????')
actual_output = [] actual_output = []
record_iterator = tf.python_io.tf_record_iterator(path=filenames[0]) record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
......
...@@ -45,26 +45,12 @@ from __future__ import absolute_import ...@@ -45,26 +45,12 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import argparse
import os import os
import threading import threading
from absl import app from absl import app
from absl import flags
import apache_beam as beam import apache_beam as beam
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from apache_beam import runners
flags.DEFINE_string('detection_input_tfrecord', None, 'TFRecord containing '
'images in tf.Example format for object detection.')
flags.DEFINE_string('detection_output_tfrecord', None,
'TFRecord containing detections in tf.Example format.')
flags.DEFINE_string('detection_model_dir', None, 'Path to directory containing'
'an object detection SavedModel.')
flags.DEFINE_float('confidence_threshold', 0.9,
'Min confidence to keep bounding boxes')
flags.DEFINE_integer('num_shards', 0, 'Number of output shards.')
FLAGS = flags.FLAGS
class GenerateDetectionDataFn(beam.DoFn): class GenerateDetectionDataFn(beam.DoFn):
...@@ -205,58 +191,103 @@ class GenerateDetectionDataFn(beam.DoFn): ...@@ -205,58 +191,103 @@ class GenerateDetectionDataFn(beam.DoFn):
return [example] return [example]
def construct_pipeline(input_tfrecord, output_tfrecord, model_dir, def construct_pipeline(pipeline, input_tfrecord, output_tfrecord, model_dir,
confidence_threshold, num_shards): confidence_threshold, num_shards):
"""Returns a Beam pipeline to run object detection inference. """Returns a Beam pipeline to run object detection inference.
Args: Args:
pipeline: Initialized beam pipeline.
input_tfrecord: A TFRecord of tf.train.Example protos containing images. input_tfrecord: A TFRecord of tf.train.Example protos containing images.
output_tfrecord: A TFRecord of tf.train.Example protos that contain images output_tfrecord: A TFRecord of tf.train.Example protos that contain images
in the input TFRecord and the detections from the model. in the input TFRecord and the detections from the model.
model_dir: Path to `saved_model` to use for inference. model_dir: Path to `saved_model` to use for inference.
confidence_threshold: Threshold to use when keeping detection results. confidence_threshold: Threshold to use when keeping detection results.
num_shards: The number of output shards. num_shards: The number of output shards.
"""
input_collection = (
pipeline | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
input_tfrecord,
coder=beam.coders.BytesCoder()))
output_collection = input_collection | 'RunInference' >> beam.ParDo(
GenerateDetectionDataFn(model_dir, confidence_threshold))
output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle()
_ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
output_tfrecord,
num_shards=num_shards,
coder=beam.coders.ProtoCoder(tf.train.Example))
def parse_args(argv):
"""Command-line argument parser.
Args:
argv: command line arguments
Returns: Returns:
pipeline: A Beam pipeline. beam_args: Arguments for the beam pipeline.
pipeline_args: Arguments for the pipeline options, such as runner type.
""" """
def pipeline(root): parser = argparse.ArgumentParser()
input_collection = ( parser.add_argument(
root | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord( '--detection_input_tfrecord',
input_tfrecord, dest='detection_input_tfrecord',
coder=beam.coders.BytesCoder())) required=True,
output_collection = input_collection | 'RunInference' >> beam.ParDo( help='TFRecord containing images in tf.Example format for object '
GenerateDetectionDataFn(model_dir, confidence_threshold)) 'detection.')
output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle() parser.add_argument(
_ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord( '--detection_output_tfrecord',
output_tfrecord, dest='detection_output_tfrecord',
num_shards=num_shards, required=True,
coder=beam.coders.ProtoCoder(tf.train.Example)) help='TFRecord containing detections in tf.Example format.')
return pipeline parser.add_argument(
'--detection_model_dir',
dest='detection_model_dir',
def main(_): required=True,
help='Path to directory containing an object detection SavedModel.')
parser.add_argument(
'--confidence_threshold',
dest='confidence_threshold',
default=0.9,
help='Min confidence to keep bounding boxes.')
parser.add_argument(
'--num_shards',
dest='num_shards',
default=0,
help='Number of output shards.')
beam_args, pipeline_args = parser.parse_known_args(argv)
return beam_args, pipeline_args
def main(argv=None, save_main_session=True):
"""Runs the Beam pipeline that performs inference. """Runs the Beam pipeline that performs inference.
Args: Args:
_: unused argv: Command line arguments.
save_main_session: Whether to save the main session.
""" """
# must create before flags are used
runner = runners.DirectRunner()
dirname = os.path.dirname(FLAGS.detection_output_tfrecord) args, pipeline_args = parse_args(argv)
pipeline_options = beam.options.pipeline_options.PipelineOptions(
pipeline_args)
pipeline_options.view_as(
beam.options.pipeline_options.SetupOptions).save_main_session = (
save_main_session)
dirname = os.path.dirname(args.detection_output_tfrecord)
tf.io.gfile.makedirs(dirname) tf.io.gfile.makedirs(dirname)
runner.run(
construct_pipeline(FLAGS.detection_input_tfrecord, p = beam.Pipeline(options=pipeline_options)
FLAGS.detection_output_tfrecord,
FLAGS.detection_model_dir, construct_pipeline(
FLAGS.confidence_threshold, p,
FLAGS.num_shards)) args.detection_input_tfrecord,
args.detection_output_tfrecord,
args.detection_model_dir,
args.confidence_threshold,
args.num_shards)
p.run()
if __name__ == '__main__': if __name__ == '__main__':
flags.mark_flags_as_required([
'detection_input_tfrecord',
'detection_output_tfrecord',
'detection_model_dir'
])
app.run(main) app.run(main)
...@@ -22,6 +22,7 @@ import contextlib ...@@ -22,6 +22,7 @@ import contextlib
import os import os
import tempfile import tempfile
import unittest import unittest
import apache_beam as beam
import numpy as np import numpy as np
import six import six
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
...@@ -32,7 +33,6 @@ from object_detection.core import model ...@@ -32,7 +33,6 @@ from object_detection.core import model
from object_detection.dataset_tools.context_rcnn import generate_detection_data from object_detection.dataset_tools.context_rcnn import generate_detection_data
from object_detection.protos import pipeline_pb2 from object_detection.protos import pipeline_pb2
from object_detection.utils import tf_version from object_detection.utils import tf_version
from apache_beam import runners
if six.PY2: if six.PY2:
import mock # pylint: disable=g-import-not-at-top import mock # pylint: disable=g-import-not-at-top
...@@ -67,6 +67,9 @@ class FakeModel(model.DetectionModel): ...@@ -67,6 +67,9 @@ class FakeModel(model.DetectionModel):
def restore_map(self, checkpoint_path, fine_tune_checkpoint_type): def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
pass pass
def restore_from_objects(self, fine_tune_checkpoint_type):
pass
def loss(self, prediction_dict, true_image_shapes): def loss(self, prediction_dict, true_image_shapes):
pass pass
...@@ -243,16 +246,18 @@ class GenerateDetectionDataTest(tf.test.TestCase): ...@@ -243,16 +246,18 @@ class GenerateDetectionDataTest(tf.test.TestCase):
def test_beam_pipeline(self): def test_beam_pipeline(self):
with InMemoryTFRecord([self._create_tf_example()]) as input_tfrecord: with InMemoryTFRecord([self._create_tf_example()]) as input_tfrecord:
runner = runners.DirectRunner()
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR')) temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
output_tfrecord = os.path.join(temp_dir, 'output_tfrecord') output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
saved_model_path = self._export_saved_model() saved_model_path = self._export_saved_model()
confidence_threshold = 0.8 confidence_threshold = 0.8
num_shards = 1 num_shards = 1
pipeline = generate_detection_data.construct_pipeline( pipeline_options = beam.options.pipeline_options.PipelineOptions(
input_tfrecord, output_tfrecord, saved_model_path, runner='DirectRunner')
p = beam.Pipeline(options=pipeline_options)
generate_detection_data.construct_pipeline(
p, input_tfrecord, output_tfrecord, saved_model_path,
confidence_threshold, num_shards) confidence_threshold, num_shards)
runner.run(pipeline) p.run()
filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????') filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
actual_output = [] actual_output = []
record_iterator = tf.python_io.tf_record_iterator(path=filenames[0]) record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
......
...@@ -47,34 +47,17 @@ from __future__ import absolute_import ...@@ -47,34 +47,17 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import argparse
import datetime import datetime
import os import os
import threading import threading
from absl import app from absl import app
from absl import flags
import apache_beam as beam import apache_beam as beam
import numpy as np import numpy as np
import six import six
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from apache_beam import runners
flags.DEFINE_string('embedding_input_tfrecord', None, 'TFRecord containing'
'images in tf.Example format for object detection.')
flags.DEFINE_string('embedding_output_tfrecord', None,
'TFRecord containing embeddings in tf.Example format.')
flags.DEFINE_string('embedding_model_dir', None, 'Path to directory containing'
'an object detection SavedModel with'
'detection_box_classifier_features in the output.')
flags.DEFINE_integer('top_k_embedding_count', 1,
'The number of top k embeddings to add to the memory bank.'
)
flags.DEFINE_integer('bottom_k_embedding_count', 0,
'The number of bottom k embeddings to add to the memory '
'bank.')
flags.DEFINE_integer('num_shards', 0, 'Number of output shards.')
FLAGS = flags.FLAGS
class GenerateEmbeddingDataFn(beam.DoFn): class GenerateEmbeddingDataFn(beam.DoFn):
...@@ -321,12 +304,13 @@ class GenerateEmbeddingDataFn(beam.DoFn): ...@@ -321,12 +304,13 @@ class GenerateEmbeddingDataFn(beam.DoFn):
return [example] return [example]
def construct_pipeline(input_tfrecord, output_tfrecord, model_dir, def construct_pipeline(pipeline, input_tfrecord, output_tfrecord, model_dir,
top_k_embedding_count, bottom_k_embedding_count, top_k_embedding_count, bottom_k_embedding_count,
num_shards): num_shards):
"""Returns a beam pipeline to run object detection inference. """Returns a beam pipeline to run object detection inference.
Args: Args:
pipeline: Initialized beam pipeline.
input_tfrecord: An TFRecord of tf.train.Example protos containing images. input_tfrecord: An TFRecord of tf.train.Example protos containing images.
output_tfrecord: An TFRecord of tf.train.Example protos that contain images output_tfrecord: An TFRecord of tf.train.Example protos that contain images
in the input TFRecord and the detections from the model. in the input TFRecord and the detections from the model.
...@@ -335,44 +319,96 @@ def construct_pipeline(input_tfrecord, output_tfrecord, model_dir, ...@@ -335,44 +319,96 @@ def construct_pipeline(input_tfrecord, output_tfrecord, model_dir,
bottom_k_embedding_count: The number of low-confidence embeddings to store. bottom_k_embedding_count: The number of low-confidence embeddings to store.
num_shards: The number of output shards. num_shards: The number of output shards.
""" """
def pipeline(root): input_collection = (
input_collection = ( pipeline | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
root | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord( input_tfrecord,
input_tfrecord, coder=beam.coders.BytesCoder()))
coder=beam.coders.BytesCoder())) output_collection = input_collection | 'ExtractEmbedding' >> beam.ParDo(
output_collection = input_collection | 'ExtractEmbedding' >> beam.ParDo( GenerateEmbeddingDataFn(model_dir, top_k_embedding_count,
GenerateEmbeddingDataFn(model_dir, top_k_embedding_count, bottom_k_embedding_count))
bottom_k_embedding_count)) output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle()
output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle() _ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
_ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord( output_tfrecord,
output_tfrecord, num_shards=num_shards,
num_shards=num_shards, coder=beam.coders.ProtoCoder(tf.train.Example))
coder=beam.coders.ProtoCoder(tf.train.Example))
return pipeline
def parse_args(argv):
"""Command-line argument parser.
def main(_):
Args:
argv: command line arguments
Returns:
beam_args: Arguments for the beam pipeline.
pipeline_args: Arguments for the pipeline options, such as runner type.
"""
parser = argparse.ArgumentParser()
parser.add_argument(
'--embedding_input_tfrecord',
dest='embedding_input_tfrecord',
required=True,
help='TFRecord containing images in tf.Example format for object '
'detection.')
parser.add_argument(
'--embedding_output_tfrecord',
dest='embedding_output_tfrecord',
required=True,
help='TFRecord containing embeddings in tf.Example format.')
parser.add_argument(
'--embedding_model_dir',
dest='embedding_model_dir',
required=True,
help='Path to directory containing an object detection SavedModel with'
'detection_box_classifier_features in the output.')
parser.add_argument(
'--top_k_embedding_count',
dest='top_k_embedding_count',
default=1,
help='The number of top k embeddings to add to the memory bank.')
parser.add_argument(
'--bottom_k_embedding_count',
dest='bottom_k_embedding_count',
default=0,
help='The number of bottom k embeddings to add to the memory bank.')
parser.add_argument(
'--num_shards',
dest='num_shards',
default=0,
help='Number of output shards.')
beam_args, pipeline_args = parser.parse_known_args(argv)
return beam_args, pipeline_args
def main(argv=None, save_main_session=True):
"""Runs the Beam pipeline that performs inference. """Runs the Beam pipeline that performs inference.
Args: Args:
_: unused argv: Command line arguments.
save_main_session: Whether to save the main session.
""" """
# must create before flags are used args, pipeline_args = parse_args(argv)
runner = runners.DirectRunner()
pipeline_options = beam.options.pipeline_options.PipelineOptions(
pipeline_args)
pipeline_options.view_as(
beam.options.pipeline_options.SetupOptions).save_main_session = (
save_main_session)
dirname = os.path.dirname(FLAGS.embedding_output_tfrecord) dirname = os.path.dirname(args.embedding_output_tfrecord)
tf.io.gfile.makedirs(dirname) tf.io.gfile.makedirs(dirname)
runner.run(
construct_pipeline(FLAGS.embedding_input_tfrecord,
FLAGS.embedding_output_tfrecord,
FLAGS.embedding_model_dir, FLAGS.top_k_embedding_count,
FLAGS.bottom_k_embedding_count, FLAGS.num_shards))
p = beam.Pipeline(options=pipeline_options)
construct_pipeline(
p,
args.embedding_input_tfrecord,
args.embedding_output_tfrecord,
args.embedding_model_dir,
args.top_k_embedding_count,
args.bottom_k_embedding_count,
args.num_shards)
p.run()
if __name__ == '__main__': if __name__ == '__main__':
flags.mark_flags_as_required([
'embedding_input_tfrecord',
'embedding_output_tfrecord',
'embedding_model_dir'
])
app.run(main) app.run(main)
...@@ -21,6 +21,7 @@ import contextlib ...@@ -21,6 +21,7 @@ import contextlib
import os import os
import tempfile import tempfile
import unittest import unittest
import apache_beam as beam
import numpy as np import numpy as np
import six import six
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
...@@ -30,7 +31,7 @@ from object_detection.core import model ...@@ -30,7 +31,7 @@ from object_detection.core import model
from object_detection.dataset_tools.context_rcnn import generate_embedding_data from object_detection.dataset_tools.context_rcnn import generate_embedding_data
from object_detection.protos import pipeline_pb2 from object_detection.protos import pipeline_pb2
from object_detection.utils import tf_version from object_detection.utils import tf_version
from apache_beam import runners
if six.PY2: if six.PY2:
import mock # pylint: disable=g-import-not-at-top import mock # pylint: disable=g-import-not-at-top
...@@ -73,6 +74,9 @@ class FakeModel(model.DetectionModel): ...@@ -73,6 +74,9 @@ class FakeModel(model.DetectionModel):
def restore_map(self, checkpoint_path, fine_tune_checkpoint_type): def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
pass pass
def restore_from_objects(self, fine_tune_checkpoint_type):
pass
def loss(self, prediction_dict, true_image_shapes): def loss(self, prediction_dict, true_image_shapes):
pass pass
...@@ -236,13 +240,13 @@ class GenerateEmbeddingData(tf.test.TestCase): ...@@ -236,13 +240,13 @@ class GenerateEmbeddingData(tf.test.TestCase):
.int64_list.value, [5]) .int64_list.value, [5])
self.assertAllEqual( self.assertAllEqual(
example.features.feature['image/object/class/text'] example.features.feature['image/object/class/text']
.bytes_list.value, ['hyena']) .bytes_list.value, [b'hyena'])
self.assertAllClose( self.assertAllClose(
example.features.feature['image/class/label'] example.features.feature['image/class/label']
.int64_list.value, [5]) .int64_list.value, [5])
self.assertAllEqual( self.assertAllEqual(
example.features.feature['image/class/text'] example.features.feature['image/class/text']
.bytes_list.value, ['hyena']) .bytes_list.value, [b'hyena'])
# Check other essential attributes. # Check other essential attributes.
self.assertAllEqual( self.assertAllEqual(
...@@ -251,7 +255,7 @@ class GenerateEmbeddingData(tf.test.TestCase): ...@@ -251,7 +255,7 @@ class GenerateEmbeddingData(tf.test.TestCase):
example.features.feature['image/width'].int64_list.value, [600]) example.features.feature['image/width'].int64_list.value, [600])
self.assertAllEqual( self.assertAllEqual(
example.features.feature['image/source_id'].bytes_list.value, example.features.feature['image/source_id'].bytes_list.value,
['image_id']) [b'image_id'])
self.assertTrue( self.assertTrue(
example.features.feature['image/encoded'].bytes_list.value) example.features.feature['image/encoded'].bytes_list.value)
...@@ -268,7 +272,7 @@ class GenerateEmbeddingData(tf.test.TestCase): ...@@ -268,7 +272,7 @@ class GenerateEmbeddingData(tf.test.TestCase):
.int64_list.value, [5]) .int64_list.value, [5])
self.assertAllEqual(tf.train.Example.FromString( self.assertAllEqual(tf.train.Example.FromString(
generated_example).features.feature['image/object/class/text'] generated_example).features.feature['image/object/class/text']
.bytes_list.value, ['hyena']) .bytes_list.value, [b'hyena'])
output = inference_fn.process(generated_example) output = inference_fn.process(generated_example)
output_example = output[0] output_example = output[0]
self.assert_expected_example(output_example) self.assert_expected_example(output_example)
...@@ -304,24 +308,26 @@ class GenerateEmbeddingData(tf.test.TestCase): ...@@ -304,24 +308,26 @@ class GenerateEmbeddingData(tf.test.TestCase):
.feature['image/object/class/label'].int64_list.value, [5]) .feature['image/object/class/label'].int64_list.value, [5])
self.assertAllEqual( self.assertAllEqual(
tf.train.Example.FromString(generated_example).features tf.train.Example.FromString(generated_example).features
.feature['image/object/class/text'].bytes_list.value, ['hyena']) .feature['image/object/class/text'].bytes_list.value, [b'hyena'])
output = inference_fn.process(generated_example) output = inference_fn.process(generated_example)
output_example = output[0] output_example = output[0]
self.assert_expected_example(output_example, botk=True) self.assert_expected_example(output_example, botk=True)
def test_beam_pipeline(self): def test_beam_pipeline(self):
with InMemoryTFRecord([self._create_tf_example()]) as input_tfrecord: with InMemoryTFRecord([self._create_tf_example()]) as input_tfrecord:
runner = runners.DirectRunner()
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR')) temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
output_tfrecord = os.path.join(temp_dir, 'output_tfrecord') output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
saved_model_path = self._export_saved_model() saved_model_path = self._export_saved_model()
top_k_embedding_count = 1 top_k_embedding_count = 1
bottom_k_embedding_count = 0 bottom_k_embedding_count = 0
num_shards = 1 num_shards = 1
pipeline = generate_embedding_data.construct_pipeline( pipeline_options = beam.options.pipeline_options.PipelineOptions(
input_tfrecord, output_tfrecord, saved_model_path, runner='DirectRunner')
p = beam.Pipeline(options=pipeline_options)
generate_embedding_data.construct_pipeline(
p, input_tfrecord, output_tfrecord, saved_model_path,
top_k_embedding_count, bottom_k_embedding_count, num_shards) top_k_embedding_count, bottom_k_embedding_count, num_shards)
runner.run(pipeline) p.run()
filenames = tf.io.gfile.glob( filenames = tf.io.gfile.glob(
output_tfrecord + '-?????-of-?????') output_tfrecord + '-?????-of-?????')
actual_output = [] actual_output = []
......
...@@ -14,6 +14,9 @@ ...@@ -14,6 +14,9 @@
# ============================================================================== # ==============================================================================
r"""Convert raw COCO dataset to TFRecord for object_detection. r"""Convert raw COCO dataset to TFRecord for object_detection.
This tool supports data generation for object detection (boxes, masks),
keypoint detection, and DensePose.
Please note that this tool creates sharded output files. Please note that this tool creates sharded output files.
Example usage: Example usage:
...@@ -63,7 +66,18 @@ tf.flags.DEFINE_string('train_keypoint_annotations_file', '', ...@@ -63,7 +66,18 @@ tf.flags.DEFINE_string('train_keypoint_annotations_file', '',
'Training annotations JSON file.') 'Training annotations JSON file.')
tf.flags.DEFINE_string('val_keypoint_annotations_file', '', tf.flags.DEFINE_string('val_keypoint_annotations_file', '',
'Validation annotations JSON file.') 'Validation annotations JSON file.')
# DensePose is only available for coco 2014.
tf.flags.DEFINE_string('train_densepose_annotations_file', '',
'Training annotations JSON file for DensePose.')
tf.flags.DEFINE_string('val_densepose_annotations_file', '',
'Validation annotations JSON file for DensePose.')
tf.flags.DEFINE_string('output_dir', '/tmp/', 'Output data directory.') tf.flags.DEFINE_string('output_dir', '/tmp/', 'Output data directory.')
# Whether to only produce images/annotations on person class (for keypoint /
# densepose task).
tf.flags.DEFINE_boolean('remove_non_person_annotations', False, 'Whether to '
'remove all annotations for non-person objects.')
tf.flags.DEFINE_boolean('remove_non_person_images', False, 'Whether to '
'remove all examples that do not contain a person.')
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
...@@ -77,13 +91,33 @@ _COCO_KEYPOINT_NAMES = [ ...@@ -77,13 +91,33 @@ _COCO_KEYPOINT_NAMES = [
b'left_knee', b'right_knee', b'left_ankle', b'right_ankle' b'left_knee', b'right_knee', b'left_ankle', b'right_ankle'
] ]
_COCO_PART_NAMES = [
b'torso_back', b'torso_front', b'right_hand', b'left_hand', b'left_foot',
b'right_foot', b'right_upper_leg_back', b'left_upper_leg_back',
b'right_upper_leg_front', b'left_upper_leg_front', b'right_lower_leg_back',
b'left_lower_leg_back', b'right_lower_leg_front', b'left_lower_leg_front',
b'left_upper_arm_back', b'right_upper_arm_back', b'left_upper_arm_front',
b'right_upper_arm_front', b'left_lower_arm_back', b'right_lower_arm_back',
b'left_lower_arm_front', b'right_lower_arm_front', b'right_face',
b'left_face',
]
_DP_PART_ID_OFFSET = 1
def clip_to_unit(x):
return min(max(x, 0.0), 1.0)
def create_tf_example(image, def create_tf_example(image,
annotations_list, annotations_list,
image_dir, image_dir,
category_index, category_index,
include_masks=False, include_masks=False,
keypoint_annotations_dict=None): keypoint_annotations_dict=None,
densepose_annotations_dict=None,
remove_non_person_annotations=False,
remove_non_person_images=False):
"""Converts image and annotations to a tf.Example proto. """Converts image and annotations to a tf.Example proto.
Args: Args:
...@@ -108,10 +142,23 @@ def create_tf_example(image, ...@@ -108,10 +142,23 @@ def create_tf_example(image,
dictionary with keys: [u'keypoints', u'num_keypoints'] represeting the dictionary with keys: [u'keypoints', u'num_keypoints'] represeting the
keypoint information for this person object annotation. If None, then keypoint information for this person object annotation. If None, then
no keypoint annotations will be populated. no keypoint annotations will be populated.
densepose_annotations_dict: A dictionary that maps from annotation_id to a
dictionary with keys: [u'dp_I', u'dp_x', u'dp_y', 'dp_U', 'dp_V']
representing part surface coordinates. For more information see
http://densepose.org/.
remove_non_person_annotations: Whether to remove any annotations that are
not the "person" class.
remove_non_person_images: Whether to remove any images that do not contain
at least one "person" annotation.
Returns: Returns:
key: SHA256 hash of the image.
example: The converted tf.Example example: The converted tf.Example
num_annotations_skipped: Number of (invalid) annotations that were ignored. num_annotations_skipped: Number of (invalid) annotations that were ignored.
num_keypoint_annotation_skipped: Number of keypoint annotations that were
skipped.
num_densepose_annotation_skipped: Number of DensePose annotations that were
skipped.
Raises: Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG ValueError: if the image pointed to by data['filename'] is not a valid JPEG
...@@ -146,6 +193,16 @@ def create_tf_example(image, ...@@ -146,6 +193,16 @@ def create_tf_example(image,
num_annotations_skipped = 0 num_annotations_skipped = 0
num_keypoint_annotation_used = 0 num_keypoint_annotation_used = 0
num_keypoint_annotation_skipped = 0 num_keypoint_annotation_skipped = 0
dp_part_index = []
dp_x = []
dp_y = []
dp_u = []
dp_v = []
dp_num_points = []
densepose_keys = ['dp_I', 'dp_U', 'dp_V', 'dp_x', 'dp_y', 'bbox']
include_densepose = densepose_annotations_dict is not None
num_densepose_annotation_used = 0
num_densepose_annotation_skipped = 0
for object_annotations in annotations_list: for object_annotations in annotations_list:
(x, y, width, height) = tuple(object_annotations['bbox']) (x, y, width, height) = tuple(object_annotations['bbox'])
if width <= 0 or height <= 0: if width <= 0 or height <= 0:
...@@ -154,14 +211,18 @@ def create_tf_example(image, ...@@ -154,14 +211,18 @@ def create_tf_example(image,
if x + width > image_width or y + height > image_height: if x + width > image_width or y + height > image_height:
num_annotations_skipped += 1 num_annotations_skipped += 1
continue continue
category_id = int(object_annotations['category_id'])
category_name = category_index[category_id]['name'].encode('utf8')
if remove_non_person_annotations and category_name != b'person':
num_annotations_skipped += 1
continue
xmin.append(float(x) / image_width) xmin.append(float(x) / image_width)
xmax.append(float(x + width) / image_width) xmax.append(float(x + width) / image_width)
ymin.append(float(y) / image_height) ymin.append(float(y) / image_height)
ymax.append(float(y + height) / image_height) ymax.append(float(y + height) / image_height)
is_crowd.append(object_annotations['iscrowd']) is_crowd.append(object_annotations['iscrowd'])
category_id = int(object_annotations['category_id'])
category_ids.append(category_id) category_ids.append(category_id)
category_names.append(category_index[category_id]['name'].encode('utf8')) category_names.append(category_name)
area.append(object_annotations['area']) area.append(object_annotations['area'])
if include_masks: if include_masks:
...@@ -197,6 +258,40 @@ def create_tf_example(image, ...@@ -197,6 +258,40 @@ def create_tf_example(image,
keypoints_visibility.extend([0] * len(_COCO_KEYPOINT_NAMES)) keypoints_visibility.extend([0] * len(_COCO_KEYPOINT_NAMES))
keypoints_name.extend(_COCO_KEYPOINT_NAMES) keypoints_name.extend(_COCO_KEYPOINT_NAMES)
num_keypoints.append(0) num_keypoints.append(0)
if include_densepose:
annotation_id = object_annotations['id']
if (annotation_id in densepose_annotations_dict and
all(key in densepose_annotations_dict[annotation_id]
for key in densepose_keys)):
dp_annotations = densepose_annotations_dict[annotation_id]
num_densepose_annotation_used += 1
dp_num_points.append(len(dp_annotations['dp_I']))
dp_part_index.extend([int(i - _DP_PART_ID_OFFSET)
for i in dp_annotations['dp_I']])
# DensePose surface coordinates are defined on a [256, 256] grid
# relative to each instance box (i.e. absolute coordinates in range
# [0., 256.]). The following converts the coordinates
# so that they are expressed in normalized image coordinates.
dp_x_box_rel = [
clip_to_unit(val / 256.) for val in dp_annotations['dp_x']]
dp_x_norm = [(float(x) + x_box_rel * width) / image_width
for x_box_rel in dp_x_box_rel]
dp_y_box_rel = [
clip_to_unit(val / 256.) for val in dp_annotations['dp_y']]
dp_y_norm = [(float(y) + y_box_rel * height) / image_height
for y_box_rel in dp_y_box_rel]
dp_x.extend(dp_x_norm)
dp_y.extend(dp_y_norm)
dp_u.extend(dp_annotations['dp_U'])
dp_v.extend(dp_annotations['dp_V'])
else:
dp_num_points.append(0)
if (remove_non_person_images and
not any(name == b'person' for name in category_names)):
return (key, None, num_annotations_skipped,
num_keypoint_annotation_skipped, num_densepose_annotation_skipped)
feature_dict = { feature_dict = {
'image/height': 'image/height':
dataset_util.int64_feature(image_height), dataset_util.int64_feature(image_height),
...@@ -243,15 +338,34 @@ def create_tf_example(image, ...@@ -243,15 +338,34 @@ def create_tf_example(image,
dataset_util.bytes_list_feature(keypoints_name)) dataset_util.bytes_list_feature(keypoints_name))
num_keypoint_annotation_skipped = ( num_keypoint_annotation_skipped = (
len(keypoint_annotations_dict) - num_keypoint_annotation_used) len(keypoint_annotations_dict) - num_keypoint_annotation_used)
if include_densepose:
feature_dict['image/object/densepose/num'] = (
dataset_util.int64_list_feature(dp_num_points))
feature_dict['image/object/densepose/part_index'] = (
dataset_util.int64_list_feature(dp_part_index))
feature_dict['image/object/densepose/x'] = (
dataset_util.float_list_feature(dp_x))
feature_dict['image/object/densepose/y'] = (
dataset_util.float_list_feature(dp_y))
feature_dict['image/object/densepose/u'] = (
dataset_util.float_list_feature(dp_u))
feature_dict['image/object/densepose/v'] = (
dataset_util.float_list_feature(dp_v))
num_densepose_annotation_skipped = (
len(densepose_annotations_dict) - num_densepose_annotation_used)
example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
return key, example, num_annotations_skipped, num_keypoint_annotation_skipped return (key, example, num_annotations_skipped,
num_keypoint_annotation_skipped, num_densepose_annotation_skipped)
def _create_tf_record_from_coco_annotations(annotations_file, image_dir, def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
output_path, include_masks, output_path, include_masks,
num_shards, num_shards,
keypoint_annotations_file=''): keypoint_annotations_file='',
densepose_annotations_file='',
remove_non_person_annotations=False,
remove_non_person_images=False):
"""Loads COCO annotation json files and converts to tf.Record format. """Loads COCO annotation json files and converts to tf.Record format.
Args: Args:
...@@ -264,6 +378,12 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir, ...@@ -264,6 +378,12 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
keypoint_annotations_file: JSON file containing the person keypoint keypoint_annotations_file: JSON file containing the person keypoint
annotations. If empty, then no person keypoint annotations will be annotations. If empty, then no person keypoint annotations will be
generated. generated.
densepose_annotations_file: JSON file containing the DensePose annotations.
If empty, then no DensePose annotations will be generated.
remove_non_person_annotations: Whether to remove any annotations that are
not the "person" class.
remove_non_person_images: Whether to remove any images that do not contain
at least one "person" annotation.
""" """
with contextlib2.ExitStack() as tf_record_close_stack, \ with contextlib2.ExitStack() as tf_record_close_stack, \
tf.gfile.GFile(annotations_file, 'r') as fid: tf.gfile.GFile(annotations_file, 'r') as fid:
...@@ -288,7 +408,8 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir, ...@@ -288,7 +408,8 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
if image_id not in annotations_index: if image_id not in annotations_index:
missing_annotation_count += 1 missing_annotation_count += 1
annotations_index[image_id] = [] annotations_index[image_id] = []
logging.info('%d images are missing annotations.', missing_annotation_count) logging.info('%d images are missing annotations.',
missing_annotation_count)
keypoint_annotations_index = {} keypoint_annotations_index = {}
if keypoint_annotations_file: if keypoint_annotations_file:
...@@ -301,8 +422,20 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir, ...@@ -301,8 +422,20 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
keypoint_annotations_index[image_id] = {} keypoint_annotations_index[image_id] = {}
keypoint_annotations_index[image_id][annotation['id']] = annotation keypoint_annotations_index[image_id][annotation['id']] = annotation
densepose_annotations_index = {}
if densepose_annotations_file:
with tf.gfile.GFile(densepose_annotations_file, 'r') as fid:
densepose_groundtruth_data = json.load(fid)
if 'annotations' in densepose_groundtruth_data:
for annotation in densepose_groundtruth_data['annotations']:
image_id = annotation['image_id']
if image_id not in densepose_annotations_index:
densepose_annotations_index[image_id] = {}
densepose_annotations_index[image_id][annotation['id']] = annotation
total_num_annotations_skipped = 0 total_num_annotations_skipped = 0
total_num_keypoint_annotations_skipped = 0 total_num_keypoint_annotations_skipped = 0
total_num_densepose_annotations_skipped = 0
for idx, image in enumerate(images): for idx, image in enumerate(images):
if idx % 100 == 0: if idx % 100 == 0:
logging.info('On image %d of %d', idx, len(images)) logging.info('On image %d of %d', idx, len(images))
...@@ -312,19 +445,31 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir, ...@@ -312,19 +445,31 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
keypoint_annotations_dict = {} keypoint_annotations_dict = {}
if image['id'] in keypoint_annotations_index: if image['id'] in keypoint_annotations_index:
keypoint_annotations_dict = keypoint_annotations_index[image['id']] keypoint_annotations_dict = keypoint_annotations_index[image['id']]
(_, tf_example, num_annotations_skipped, densepose_annotations_dict = None
num_keypoint_annotations_skipped) = create_tf_example( if densepose_annotations_file:
densepose_annotations_dict = {}
if image['id'] in densepose_annotations_index:
densepose_annotations_dict = densepose_annotations_index[image['id']]
(_, tf_example, num_annotations_skipped, num_keypoint_annotations_skipped,
num_densepose_annotations_skipped) = create_tf_example(
image, annotations_list, image_dir, category_index, include_masks, image, annotations_list, image_dir, category_index, include_masks,
keypoint_annotations_dict) keypoint_annotations_dict, densepose_annotations_dict,
remove_non_person_annotations, remove_non_person_images)
total_num_annotations_skipped += num_annotations_skipped total_num_annotations_skipped += num_annotations_skipped
total_num_keypoint_annotations_skipped += num_keypoint_annotations_skipped total_num_keypoint_annotations_skipped += num_keypoint_annotations_skipped
total_num_densepose_annotations_skipped += (
num_densepose_annotations_skipped)
shard_idx = idx % num_shards shard_idx = idx % num_shards
output_tfrecords[shard_idx].write(tf_example.SerializeToString()) if tf_example:
output_tfrecords[shard_idx].write(tf_example.SerializeToString())
logging.info('Finished writing, skipped %d annotations.', logging.info('Finished writing, skipped %d annotations.',
total_num_annotations_skipped) total_num_annotations_skipped)
if keypoint_annotations_file: if keypoint_annotations_file:
logging.info('Finished writing, skipped %d keypoint annotations.', logging.info('Finished writing, skipped %d keypoint annotations.',
total_num_keypoint_annotations_skipped) total_num_keypoint_annotations_skipped)
if densepose_annotations_file:
logging.info('Finished writing, skipped %d DensePose annotations.',
total_num_densepose_annotations_skipped)
def main(_): def main(_):
...@@ -347,20 +492,26 @@ def main(_): ...@@ -347,20 +492,26 @@ def main(_):
train_output_path, train_output_path,
FLAGS.include_masks, FLAGS.include_masks,
num_shards=100, num_shards=100,
keypoint_annotations_file=FLAGS.train_keypoint_annotations_file) keypoint_annotations_file=FLAGS.train_keypoint_annotations_file,
densepose_annotations_file=FLAGS.train_densepose_annotations_file,
remove_non_person_annotations=FLAGS.remove_non_person_annotations,
remove_non_person_images=FLAGS.remove_non_person_images)
_create_tf_record_from_coco_annotations( _create_tf_record_from_coco_annotations(
FLAGS.val_annotations_file, FLAGS.val_annotations_file,
FLAGS.val_image_dir, FLAGS.val_image_dir,
val_output_path, val_output_path,
FLAGS.include_masks, FLAGS.include_masks,
num_shards=100, num_shards=50,
keypoint_annotations_file=FLAGS.val_keypoint_annotations_file) keypoint_annotations_file=FLAGS.val_keypoint_annotations_file,
densepose_annotations_file=FLAGS.val_densepose_annotations_file,
remove_non_person_annotations=FLAGS.remove_non_person_annotations,
remove_non_person_images=FLAGS.remove_non_person_images)
_create_tf_record_from_coco_annotations( _create_tf_record_from_coco_annotations(
FLAGS.testdev_annotations_file, FLAGS.testdev_annotations_file,
FLAGS.test_image_dir, FLAGS.test_image_dir,
testdev_output_path, testdev_output_path,
FLAGS.include_masks, FLAGS.include_masks,
num_shards=100) num_shards=50)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -89,7 +89,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase): ...@@ -89,7 +89,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
} }
(_, example, (_, example,
num_annotations_skipped, _) = create_coco_tf_record.create_tf_example( num_annotations_skipped, _, _) = create_coco_tf_record.create_tf_example(
image, annotations_list, image_dir, category_index) image, annotations_list, image_dir, category_index)
self.assertEqual(num_annotations_skipped, 0) self.assertEqual(num_annotations_skipped, 0)
...@@ -156,7 +156,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase): ...@@ -156,7 +156,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
} }
(_, example, (_, example,
num_annotations_skipped, _) = create_coco_tf_record.create_tf_example( num_annotations_skipped, _, _) = create_coco_tf_record.create_tf_example(
image, annotations_list, image_dir, category_index, include_masks=True) image, annotations_list, image_dir, category_index, include_masks=True)
self.assertEqual(num_annotations_skipped, 0) self.assertEqual(num_annotations_skipped, 0)
...@@ -259,14 +259,14 @@ class CreateCocoTFRecordTest(tf.test.TestCase): ...@@ -259,14 +259,14 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
} }
} }
(_, example, _, _, example, _, num_keypoint_annotation_skipped, _ = (
num_keypoint_annotation_skipped) = create_coco_tf_record.create_tf_example( create_coco_tf_record.create_tf_example(
image, image,
annotations_list, annotations_list,
image_dir, image_dir,
category_index, category_index,
include_masks=False, include_masks=False,
keypoint_annotations_dict=keypoint_annotations_dict) keypoint_annotations_dict=keypoint_annotations_dict))
self.assertEqual(num_keypoint_annotation_skipped, 0) self.assertEqual(num_keypoint_annotation_skipped, 0)
self._assertProtoEqual( self._assertProtoEqual(
...@@ -310,6 +310,132 @@ class CreateCocoTFRecordTest(tf.test.TestCase): ...@@ -310,6 +310,132 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
example.features.feature[ example.features.feature[
'image/object/keypoint/visibility'].int64_list.value, vv) 'image/object/keypoint/visibility'].int64_list.value, vv)
def test_create_tf_example_with_dense_pose(self):
image_dir = self.get_temp_dir()
image_file_name = 'tmp_image.jpg'
image_data = np.random.randint(low=0, high=256, size=(256, 256, 3)).astype(
np.uint8)
save_path = os.path.join(image_dir, image_file_name)
image = PIL.Image.fromarray(image_data, 'RGB')
image.save(save_path)
image = {
'file_name': image_file_name,
'height': 256,
'width': 256,
'id': 11,
}
min_x, min_y = 64, 64
max_x, max_y = 128, 128
keypoints = []
num_visible_keypoints = 0
xv = []
yv = []
vv = []
for _ in range(17):
xc = min_x + int(np.random.rand()*(max_x - min_x))
yc = min_y + int(np.random.rand()*(max_y - min_y))
vis = np.random.randint(0, 3)
xv.append(xc)
yv.append(yc)
vv.append(vis)
keypoints.extend([xc, yc, vis])
num_visible_keypoints += (vis > 0)
annotations_list = [{
'area': 0.5,
'iscrowd': False,
'image_id': 11,
'bbox': [64, 64, 128, 128],
'category_id': 1,
'id': 1000
}]
num_points = 45
dp_i = np.random.randint(1, 25, (num_points,)).astype(np.float32)
dp_u = np.random.randn(num_points)
dp_v = np.random.randn(num_points)
dp_x = np.random.rand(num_points)*256.
dp_y = np.random.rand(num_points)*256.
densepose_annotations_dict = {
1000: {
'dp_I': dp_i,
'dp_U': dp_u,
'dp_V': dp_v,
'dp_x': dp_x,
'dp_y': dp_y,
'bbox': [64, 64, 128, 128],
}
}
category_index = {
1: {
'name': 'person',
'id': 1
}
}
_, example, _, _, num_densepose_annotation_skipped = (
create_coco_tf_record.create_tf_example(
image,
annotations_list,
image_dir,
category_index,
include_masks=False,
densepose_annotations_dict=densepose_annotations_dict))
self.assertEqual(num_densepose_annotation_skipped, 0)
self._assertProtoEqual(
example.features.feature['image/height'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/width'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/filename'].bytes_list.value,
[six.b(image_file_name)])
self._assertProtoEqual(
example.features.feature['image/source_id'].bytes_list.value,
[six.b(str(image['id']))])
self._assertProtoEqual(
example.features.feature['image/format'].bytes_list.value,
[six.b('jpeg')])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/class/text'].bytes_list.value,
[six.b('person')])
self._assertProtoEqual(
example.features.feature['image/object/densepose/num'].int64_list.value,
[num_points])
self.assertAllEqual(
example.features.feature[
'image/object/densepose/part_index'].int64_list.value,
dp_i.astype(np.int64) - create_coco_tf_record._DP_PART_ID_OFFSET)
self.assertAllClose(
example.features.feature['image/object/densepose/u'].float_list.value,
dp_u)
self.assertAllClose(
example.features.feature['image/object/densepose/v'].float_list.value,
dp_v)
expected_dp_x = (64 + dp_x * 128. / 256.) / 256.
expected_dp_y = (64 + dp_y * 128. / 256.) / 256.
self.assertAllClose(
example.features.feature['image/object/densepose/x'].float_list.value,
expected_dp_x)
self.assertAllClose(
example.features.feature['image/object/densepose/y'].float_list.value,
expected_dp_y)
def test_create_sharded_tf_record(self): def test_create_sharded_tf_record(self):
tmp_dir = self.get_temp_dir() tmp_dir = self.get_temp_dir()
image_paths = ['tmp1_image.jpg', 'tmp2_image.jpg'] image_paths = ['tmp1_image.jpg', 'tmp2_image.jpg']
......
...@@ -288,7 +288,7 @@ class SeqExampleUtilTest(tf.test.TestCase): ...@@ -288,7 +288,7 @@ class SeqExampleUtilTest(tf.test.TestCase):
[0.75, 1.], [0.75, 1.],
seq_feature_dict['region/bbox/xmax'].feature[0].float_list.value[:]) seq_feature_dict['region/bbox/xmax'].feature[0].float_list.value[:])
self.assertAllEqual( self.assertAllEqual(
['cat', 'frog'], [b'cat', b'frog'],
seq_feature_dict['region/label/string'].feature[0].bytes_list.value[:]) seq_feature_dict['region/label/string'].feature[0].bytes_list.value[:])
self.assertAllClose( self.assertAllClose(
[0.], [0.],
...@@ -332,7 +332,7 @@ class SeqExampleUtilTest(tf.test.TestCase): ...@@ -332,7 +332,7 @@ class SeqExampleUtilTest(tf.test.TestCase):
[0.75], [0.75],
seq_feature_dict['region/bbox/xmax'].feature[1].float_list.value[:]) seq_feature_dict['region/bbox/xmax'].feature[1].float_list.value[:])
self.assertAllEqual( self.assertAllEqual(
['cat'], [b'cat'],
seq_feature_dict['region/label/string'].feature[1].bytes_list.value[:]) seq_feature_dict['region/label/string'].feature[1].bytes_list.value[:])
self.assertAllClose( self.assertAllClose(
[], [],
......
...@@ -42,7 +42,7 @@ class OpenOutputTfrecordsTests(tf.test.TestCase): ...@@ -42,7 +42,7 @@ class OpenOutputTfrecordsTests(tf.test.TestCase):
tf_record_path = '{}-{:05d}-of-00010'.format( tf_record_path = '{}-{:05d}-of-00010'.format(
os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), idx) os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), idx)
records = list(tf.python_io.tf_record_iterator(tf_record_path)) records = list(tf.python_io.tf_record_iterator(tf_record_path))
self.assertAllEqual(records, ['test_{}'.format(idx)]) self.assertAllEqual(records, ['test_{}'.format(idx).encode('utf-8')])
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment