"test/vscode:/vscode.git/clone" did not exist on "34c661e71cc8cf4753843a58786c8f6211ec5e22"
Commit 31ca3b97 authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

resovle merge conflicts

parents 3e9d886d 7fcd7cba
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.densepose_ops."""
import numpy as np
import tensorflow.compat.v1 as tf
from object_detection.core import densepose_ops
from object_detection.utils import test_case
class DensePoseOpsTest(test_case.TestCase):
"""Tests for common DensePose operations."""
def test_scale(self):
def graph_fn():
dp_surface_coords = tf.constant([
[[0.0, 0.0, 0.1, 0.2], [100.0, 200.0, 0.3, 0.4]],
[[50.0, 120.0, 0.5, 0.6], [100.0, 140.0, 0.7, 0.8]]
])
y_scale = tf.constant(1.0 / 100)
x_scale = tf.constant(1.0 / 200)
output = densepose_ops.scale(dp_surface_coords, y_scale, x_scale)
return output
output = self.execute(graph_fn, [])
expected_dp_surface_coords = np.array([
[[0., 0., 0.1, 0.2], [1.0, 1.0, 0.3, 0.4]],
[[0.5, 0.6, 0.5, 0.6], [1.0, 0.7, 0.7, 0.8]]
])
self.assertAllClose(output, expected_dp_surface_coords)
def test_clip_to_window(self):
def graph_fn():
dp_surface_coords = tf.constant([
[[0.25, 0.5, 0.1, 0.2], [0.75, 0.75, 0.3, 0.4]],
[[0.5, 0.0, 0.5, 0.6], [1.0, 1.0, 0.7, 0.8]]
])
window = tf.constant([0.25, 0.25, 0.75, 0.75])
output = densepose_ops.clip_to_window(dp_surface_coords, window)
return output
output = self.execute(graph_fn, [])
expected_dp_surface_coords = np.array([
[[0.25, 0.5, 0.1, 0.2], [0.75, 0.75, 0.3, 0.4]],
[[0.5, 0.25, 0.5, 0.6], [0.75, 0.75, 0.7, 0.8]]
])
self.assertAllClose(output, expected_dp_surface_coords)
def test_prune_outside_window(self):
def graph_fn():
dp_num_points = tf.constant([2, 0, 1])
dp_part_ids = tf.constant([[1, 1], [0, 0], [16, 0]])
dp_surface_coords = tf.constant([
[[0.9, 0.5, 0.1, 0.2], [0.75, 0.75, 0.3, 0.4]],
[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
[[0.8, 0.5, 0.6, 0.6], [0.5, 0.5, 0.7, 0.7]]
])
window = tf.constant([0.25, 0.25, 0.75, 0.75])
new_dp_num_points, new_dp_part_ids, new_dp_surface_coords = (
densepose_ops.prune_outside_window(dp_num_points, dp_part_ids,
dp_surface_coords, window))
return new_dp_num_points, new_dp_part_ids, new_dp_surface_coords
new_dp_num_points, new_dp_part_ids, new_dp_surface_coords = (
self.execute_cpu(graph_fn, []))
expected_dp_num_points = np.array([1, 0, 0])
expected_dp_part_ids = np.array([[1], [0], [0]])
expected_dp_surface_coords = np.array([
[[0.75, 0.75, 0.3, 0.4]],
[[0.0, 0.0, 0.0, 0.0]],
[[0.0, 0.0, 0.0, 0.0]]
])
self.assertAllEqual(new_dp_num_points, expected_dp_num_points)
self.assertAllEqual(new_dp_part_ids, expected_dp_part_ids)
self.assertAllClose(new_dp_surface_coords, expected_dp_surface_coords)
def test_change_coordinate_frame(self):
def graph_fn():
dp_surface_coords = tf.constant([
[[0.25, 0.5, 0.1, 0.2], [0.75, 0.75, 0.3, 0.4]],
[[0.5, 0.0, 0.5, 0.6], [1.0, 1.0, 0.7, 0.8]]
])
window = tf.constant([0.25, 0.25, 0.75, 0.75])
output = densepose_ops.change_coordinate_frame(dp_surface_coords, window)
return output
output = self.execute(graph_fn, [])
expected_dp_surface_coords = np.array([
[[0, 0.5, 0.1, 0.2], [1.0, 1.0, 0.3, 0.4]],
[[0.5, -0.5, 0.5, 0.6], [1.5, 1.5, 0.7, 0.8]]
])
self.assertAllClose(output, expected_dp_surface_coords)
def test_to_normalized_coordinates(self):
def graph_fn():
dp_surface_coords = tf.constant([
[[10., 30., 0.1, 0.2], [30., 45., 0.3, 0.4]],
[[20., 0., 0.5, 0.6], [40., 60., 0.7, 0.8]]
])
output = densepose_ops.to_normalized_coordinates(
dp_surface_coords, 40, 60)
return output
output = self.execute(graph_fn, [])
expected_dp_surface_coords = np.array([
[[0.25, 0.5, 0.1, 0.2], [0.75, 0.75, 0.3, 0.4]],
[[0.5, 0.0, 0.5, 0.6], [1.0, 1.0, 0.7, 0.8]]
])
self.assertAllClose(output, expected_dp_surface_coords)
def test_to_absolute_coordinates(self):
def graph_fn():
dp_surface_coords = tf.constant([
[[0.25, 0.5, 0.1, 0.2], [0.75, 0.75, 0.3, 0.4]],
[[0.5, 0.0, 0.5, 0.6], [1.0, 1.0, 0.7, 0.8]]
])
output = densepose_ops.to_absolute_coordinates(
dp_surface_coords, 40, 60)
return output
output = self.execute(graph_fn, [])
expected_dp_surface_coords = np.array([
[[10., 30., 0.1, 0.2], [30., 45., 0.3, 0.4]],
[[20., 0., 0.5, 0.6], [40., 60., 0.7, 0.8]]
])
self.assertAllClose(output, expected_dp_surface_coords)
def test_horizontal_flip(self):
part_ids_np = np.array([[1, 4], [0, 8]], dtype=np.int32)
surf_coords_np = np.array([
[[0.1, 0.7, 0.2, 0.4], [0.3, 0.8, 0.2, 0.4]],
[[0.0, 0.5, 0.8, 0.7], [0.6, 1.0, 0.7, 0.9]],
], dtype=np.float32)
def graph_fn():
part_ids = tf.constant(part_ids_np, dtype=tf.int32)
surf_coords = tf.constant(surf_coords_np, dtype=tf.float32)
flipped_part_ids, flipped_surf_coords = densepose_ops.flip_horizontal(
part_ids, surf_coords)
flipped_twice_part_ids, flipped_twice_surf_coords = (
densepose_ops.flip_horizontal(flipped_part_ids, flipped_surf_coords))
return (flipped_part_ids, flipped_surf_coords,
flipped_twice_part_ids, flipped_twice_surf_coords)
(flipped_part_ids, flipped_surf_coords, flipped_twice_part_ids,
flipped_twice_surf_coords) = self.execute(graph_fn, [])
expected_flipped_part_ids = [[1, 5], # 1->1, 4->5
[0, 9]] # 0->0, 8->9
expected_flipped_surf_coords_yx = np.array([
[[0.1, 1.0-0.7], [0.3, 1.0-0.8]],
[[0.0, 1.0-0.5], [0.6, 1.0-1.0]],
], dtype=np.float32)
self.assertAllEqual(expected_flipped_part_ids, flipped_part_ids)
self.assertAllClose(expected_flipped_surf_coords_yx,
flipped_surf_coords[:, :, 0:2])
self.assertAllEqual(part_ids_np, flipped_twice_part_ids)
self.assertAllClose(surf_coords_np, flipped_twice_surf_coords, rtol=1e-2,
atol=1e-2)
if __name__ == '__main__':
tf.test.main()
...@@ -102,7 +102,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)): ...@@ -102,7 +102,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
Args: Args:
field: a string key, options are field: a string key, options are
fields.BoxListFields.{boxes,classes,masks,keypoints, fields.BoxListFields.{boxes,classes,masks,keypoints,
keypoint_visibilities} or keypoint_visibilities, densepose_*}
fields.InputDataFields.is_annotated. fields.InputDataFields.is_annotated.
Returns: Returns:
...@@ -123,7 +123,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)): ...@@ -123,7 +123,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
Args: Args:
field: a string key, options are field: a string key, options are
fields.BoxListFields.{boxes,classes,masks,keypoints, fields.BoxListFields.{boxes,classes,masks,keypoints,
keypoint_visibilities} or keypoint_visibilities, densepose_*} or
fields.InputDataFields.is_annotated. fields.InputDataFields.is_annotated.
Returns: Returns:
...@@ -251,9 +251,14 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)): ...@@ -251,9 +251,14 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
detection_classes: [batch, max_detections] detection_classes: [batch, max_detections]
(If a model is producing class-agnostic detections, this field may be (If a model is producing class-agnostic detections, this field may be
missing) missing)
instance_masks: [batch, max_detections, image_height, image_width] detection_masks: [batch, max_detections, mask_height, mask_width]
(optional) (optional)
keypoints: [batch, max_detections, num_keypoints, 2] (optional) detection_keypoints: [batch, max_detections, num_keypoints, 2]
(optional)
detection_keypoint_scores: [batch, max_detections, num_keypoints]
(optional)
detection_surface_coords: [batch, max_detections, mask_height,
mask_width, 2] (optional)
num_detections: [batch] num_detections: [batch]
In addition to the above fields this stage also outputs the following In addition to the above fields this stage also outputs the following
...@@ -288,19 +293,23 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)): ...@@ -288,19 +293,23 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
""" """
pass pass
def provide_groundtruth(self, def provide_groundtruth(
groundtruth_boxes_list, self,
groundtruth_classes_list, groundtruth_boxes_list,
groundtruth_masks_list=None, groundtruth_classes_list,
groundtruth_keypoints_list=None, groundtruth_masks_list=None,
groundtruth_keypoint_visibilities_list=None, groundtruth_keypoints_list=None,
groundtruth_weights_list=None, groundtruth_keypoint_visibilities_list=None,
groundtruth_confidences_list=None, groundtruth_dp_num_points_list=None,
groundtruth_is_crowd_list=None, groundtruth_dp_part_ids_list=None,
groundtruth_group_of_list=None, groundtruth_dp_surface_coords_list=None,
groundtruth_area_list=None, groundtruth_weights_list=None,
is_annotated_list=None, groundtruth_confidences_list=None,
groundtruth_labeled_classes=None): groundtruth_is_crowd_list=None,
groundtruth_group_of_list=None,
groundtruth_area_list=None,
is_annotated_list=None,
groundtruth_labeled_classes=None):
"""Provide groundtruth tensors. """Provide groundtruth tensors.
Args: Args:
...@@ -324,6 +333,15 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)): ...@@ -324,6 +333,15 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
`groundtruth_keypoint_visibilities_list`). `groundtruth_keypoint_visibilities_list`).
groundtruth_keypoint_visibilities_list: a list of 3-D tf.bool tensors groundtruth_keypoint_visibilities_list: a list of 3-D tf.bool tensors
of shape [num_boxes, num_keypoints] containing keypoint visibilities. of shape [num_boxes, num_keypoints] containing keypoint visibilities.
groundtruth_dp_num_points_list: a list of 1-D tf.int32 tensors of shape
[num_boxes] containing the number of DensePose sampled points.
groundtruth_dp_part_ids_list: a list of 2-D tf.int32 tensors of shape
[num_boxes, max_sampled_points] containing the DensePose part ids
(0-indexed) for each sampled point. Note that there may be padding.
groundtruth_dp_surface_coords_list: a list of 3-D tf.float32 tensors of
shape [num_boxes, max_sampled_points, 4] containing the DensePose
surface coordinates for each sampled point. Note that there may be
padding.
groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape
[num_boxes] containing weights for groundtruth boxes. [num_boxes] containing weights for groundtruth boxes.
groundtruth_confidences_list: A list of 2-D tf.float32 tensors of shape groundtruth_confidences_list: A list of 2-D tf.float32 tensors of shape
...@@ -361,6 +379,18 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)): ...@@ -361,6 +379,18 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
self._groundtruth_lists[ self._groundtruth_lists[
fields.BoxListFields.keypoint_visibilities] = ( fields.BoxListFields.keypoint_visibilities] = (
groundtruth_keypoint_visibilities_list) groundtruth_keypoint_visibilities_list)
if groundtruth_dp_num_points_list:
self._groundtruth_lists[
fields.BoxListFields.densepose_num_points] = (
groundtruth_dp_num_points_list)
if groundtruth_dp_part_ids_list:
self._groundtruth_lists[
fields.BoxListFields.densepose_part_ids] = (
groundtruth_dp_part_ids_list)
if groundtruth_dp_surface_coords_list:
self._groundtruth_lists[
fields.BoxListFields.densepose_surface_coords] = (
groundtruth_dp_surface_coords_list)
if groundtruth_is_crowd_list: if groundtruth_is_crowd_list:
self._groundtruth_lists[ self._groundtruth_lists[
fields.BoxListFields.is_crowd] = groundtruth_is_crowd_list fields.BoxListFields.is_crowd] = groundtruth_is_crowd_list
...@@ -391,7 +421,9 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)): ...@@ -391,7 +421,9 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
pass pass
@abc.abstractmethod @abc.abstractmethod
def restore_map(self, fine_tune_checkpoint_type='detection'): def restore_map(self,
fine_tune_checkpoint_type='detection',
load_all_detection_checkpoint_vars=False):
"""Returns a map of variables to load from a foreign checkpoint. """Returns a map of variables to load from a foreign checkpoint.
Returns a map of variable names to load from a checkpoint to variables in Returns a map of variable names to load from a checkpoint to variables in
...@@ -407,6 +439,9 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)): ...@@ -407,6 +439,9 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
checkpoint (with compatible variable names) or to restore from a checkpoint (with compatible variable names) or to restore from a
classification checkpoint for initialization prior to training. classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`. Default 'detection'. Valid values: `detection`, `classification`. Default 'detection'.
load_all_detection_checkpoint_vars: whether to load all variables (when
`fine_tune_checkpoint_type` is `detection`). If False, only variables
within the feature extractor scope are included. Default False.
Returns: Returns:
A dict mapping variable names (to load from a checkpoint) to variables in A dict mapping variable names (to load from a checkpoint) to variables in
...@@ -414,6 +449,36 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)): ...@@ -414,6 +449,36 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
""" """
pass pass
@abc.abstractmethod
def restore_from_objects(self, fine_tune_checkpoint_type='detection'):
"""Returns a map of variables to load from a foreign checkpoint.
Returns a dictionary of Tensorflow 2 Trackable objects (e.g. tf.Module
or Checkpoint). This enables the model to initialize based on weights from
another task. For example, the feature extractor variables from a
classification model can be used to bootstrap training of an object
detector. When loading from an object detection model, the checkpoint model
should have the same parameters as this detection model with exception of
the num_classes parameter.
Note that this function is intended to be used to restore Keras-based
models when running Tensorflow 2, whereas restore_map (above) is intended
to be used to restore Slim-based models when running Tensorflow 1.x.
TODO(jonathanhuang,rathodv): Check tf_version and raise unimplemented
error for both restore_map and restore_from_objects depending on version.
Args:
fine_tune_checkpoint_type: whether to restore from a full detection
checkpoint (with compatible variable names) or to restore from a
classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`. Default 'detection'.
Returns:
A dict mapping keys to Trackable objects (tf.Module or Checkpoint).
"""
pass
@abc.abstractmethod @abc.abstractmethod
def updates(self): def updates(self):
"""Returns a list of update operators for this model. """Returns a list of update operators for this model.
......
...@@ -57,6 +57,9 @@ class FakeModel(model.DetectionModel): ...@@ -57,6 +57,9 @@ class FakeModel(model.DetectionModel):
def restore_map(self): def restore_map(self):
return {} return {}
def restore_from_objects(self, fine_tune_checkpoint_type):
pass
def regularization_losses(self): def regularization_losses(self):
return [] return []
......
...@@ -79,6 +79,7 @@ import tensorflow.compat.v1 as tf ...@@ -79,6 +79,7 @@ import tensorflow.compat.v1 as tf
from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import control_flow_ops
from object_detection.core import box_list from object_detection.core import box_list
from object_detection.core import box_list_ops from object_detection.core import box_list_ops
from object_detection.core import densepose_ops
from object_detection.core import keypoint_ops from object_detection.core import keypoint_ops
from object_detection.core import preprocessor_cache from object_detection.core import preprocessor_cache
from object_detection.core import standard_fields as fields from object_detection.core import standard_fields as fields
...@@ -568,6 +569,8 @@ def random_horizontal_flip(image, ...@@ -568,6 +569,8 @@ def random_horizontal_flip(image,
masks=None, masks=None,
keypoints=None, keypoints=None,
keypoint_visibilities=None, keypoint_visibilities=None,
densepose_part_ids=None,
densepose_surface_coords=None,
keypoint_flip_permutation=None, keypoint_flip_permutation=None,
probability=0.5, probability=0.5,
seed=None, seed=None,
...@@ -589,6 +592,16 @@ def random_horizontal_flip(image, ...@@ -589,6 +592,16 @@ def random_horizontal_flip(image,
normalized coordinates. normalized coordinates.
keypoint_visibilities: (optional) rank 2 bool tensor with shape keypoint_visibilities: (optional) rank 2 bool tensor with shape
[num_instances, num_keypoints]. [num_instances, num_keypoints].
densepose_part_ids: (optional) rank 2 int32 tensor with shape
[num_instances, num_points] holding the part id for each
sampled point. These part_ids are 0-indexed, where the
first non-background part has index 0.
densepose_surface_coords: (optional) rank 3 float32 tensor with shape
[num_instances, num_points, 4]. The DensePose
coordinates are of the form (y, x, v, u) where
(y, x) are the normalized image coordinates for a
sampled point, and (v, u) is the surface
coordinate for the part.
keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation. permutation.
probability: the probability of performing this augmentation. probability: the probability of performing this augmentation.
...@@ -601,9 +614,9 @@ def random_horizontal_flip(image, ...@@ -601,9 +614,9 @@ def random_horizontal_flip(image,
Returns: Returns:
image: image which is the same shape as input image. image: image which is the same shape as input image.
If boxes, masks, keypoints, keypoint_visibilities, and If boxes, masks, keypoints, keypoint_visibilities,
keypoint_flip_permutation are not None,the function also returns the keypoint_flip_permutation, densepose_part_ids, or densepose_surface_coords
following tensors. are not None,the function also returns the following tensors.
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary Boxes are in normalized form meaning their coordinates vary
...@@ -614,9 +627,15 @@ def random_horizontal_flip(image, ...@@ -614,9 +627,15 @@ def random_horizontal_flip(image,
[num_instances, num_keypoints, 2] [num_instances, num_keypoints, 2]
keypoint_visibilities: rank 2 bool tensor with shape keypoint_visibilities: rank 2 bool tensor with shape
[num_instances, num_keypoints]. [num_instances, num_keypoints].
densepose_part_ids: rank 2 int32 tensor with shape
[num_instances, num_points].
densepose_surface_coords: rank 3 float32 tensor with shape
[num_instances, num_points, 4].
Raises: Raises:
ValueError: if keypoints are provided but keypoint_flip_permutation is not. ValueError: if keypoints are provided but keypoint_flip_permutation is not.
ValueError: if either densepose_part_ids or densepose_surface_coords is
not None, but both are not None.
""" """
def _flip_image(image): def _flip_image(image):
...@@ -628,6 +647,11 @@ def random_horizontal_flip(image, ...@@ -628,6 +647,11 @@ def random_horizontal_flip(image,
raise ValueError( raise ValueError(
'keypoints are provided but keypoints_flip_permutation is not provided') 'keypoints are provided but keypoints_flip_permutation is not provided')
if ((densepose_part_ids is not None and densepose_surface_coords is None) or
(densepose_part_ids is None and densepose_surface_coords is not None)):
raise ValueError(
'Must provide both `densepose_part_ids` and `densepose_surface_coords`')
with tf.name_scope('RandomHorizontalFlip', values=[image, boxes]): with tf.name_scope('RandomHorizontalFlip', values=[image, boxes]):
result = [] result = []
# random variable defining whether to do flip or not # random variable defining whether to do flip or not
...@@ -666,7 +690,6 @@ def random_horizontal_flip(image, ...@@ -666,7 +690,6 @@ def random_horizontal_flip(image,
# flip keypoint visibilities # flip keypoint visibilities
if (keypoint_visibilities is not None and if (keypoint_visibilities is not None and
keypoint_flip_permutation is not None): keypoint_flip_permutation is not None):
permutation = keypoint_flip_permutation
kpt_flip_perm = keypoint_flip_permutation kpt_flip_perm = keypoint_flip_permutation
keypoint_visibilities = tf.cond( keypoint_visibilities = tf.cond(
do_a_flip_random, do_a_flip_random,
...@@ -674,6 +697,17 @@ def random_horizontal_flip(image, ...@@ -674,6 +697,17 @@ def random_horizontal_flip(image,
lambda: keypoint_visibilities) lambda: keypoint_visibilities)
result.append(keypoint_visibilities) result.append(keypoint_visibilities)
# flip DensePose parts and coordinates
if densepose_part_ids is not None:
flip_densepose_fn = functools.partial(
densepose_ops.flip_horizontal, densepose_part_ids,
densepose_surface_coords)
densepose_tensors = tf.cond(
do_a_flip_random,
flip_densepose_fn,
lambda: (densepose_part_ids, densepose_surface_coords))
result.extend(densepose_tensors)
return tuple(result) return tuple(result)
...@@ -1285,6 +1319,9 @@ def _strict_random_crop_image(image, ...@@ -1285,6 +1319,9 @@ def _strict_random_crop_image(image,
masks=None, masks=None,
keypoints=None, keypoints=None,
keypoint_visibilities=None, keypoint_visibilities=None,
densepose_num_points=None,
densepose_part_ids=None,
densepose_surface_coords=None,
min_object_covered=1.0, min_object_covered=1.0,
aspect_ratio_range=(0.75, 1.33), aspect_ratio_range=(0.75, 1.33),
area_range=(0.1, 1.0), area_range=(0.1, 1.0),
...@@ -1322,6 +1359,19 @@ def _strict_random_crop_image(image, ...@@ -1322,6 +1359,19 @@ def _strict_random_crop_image(image,
normalized coordinates. normalized coordinates.
keypoint_visibilities: (optional) rank 2 bool tensor with shape keypoint_visibilities: (optional) rank 2 bool tensor with shape
[num_instances, num_keypoints]. [num_instances, num_keypoints].
densepose_num_points: (optional) rank 1 int32 tensor with shape
[num_instances] with the number of sampled points per
instance.
densepose_part_ids: (optional) rank 2 int32 tensor with shape
[num_instances, num_points] holding the part id for each
sampled point. These part_ids are 0-indexed, where the
first non-background part has index 0.
densepose_surface_coords: (optional) rank 3 float32 tensor with shape
[num_instances, num_points, 4]. The DensePose
coordinates are of the form (y, x, v, u) where
(y, x) are the normalized image coordinates for a
sampled point, and (v, u) is the surface
coordinate for the part.
min_object_covered: the cropped image must cover at least this fraction of min_object_covered: the cropped image must cover at least this fraction of
at least one of the input bounding boxes. at least one of the input bounding boxes.
aspect_ratio_range: allowed range for aspect ratio of cropped image. aspect_ratio_range: allowed range for aspect ratio of cropped image.
...@@ -1341,8 +1391,9 @@ def _strict_random_crop_image(image, ...@@ -1341,8 +1391,9 @@ def _strict_random_crop_image(image,
Boxes are in normalized form. Boxes are in normalized form.
labels: new labels. labels: new labels.
If label_weights, multiclass_scores, masks, keypoints, or If label_weights, multiclass_scores, masks, keypoints,
keypoint_visibilities is not None, the function also returns: keypoint_visibilities, densepose_num_points, densepose_part_ids, or
densepose_surface_coords is not None, the function also returns:
label_weights: rank 1 float32 tensor with shape [num_instances]. label_weights: rank 1 float32 tensor with shape [num_instances].
multiclass_scores: rank 2 float32 tensor with shape multiclass_scores: rank 2 float32 tensor with shape
[num_instances, num_classes] [num_instances, num_classes]
...@@ -1351,9 +1402,24 @@ def _strict_random_crop_image(image, ...@@ -1351,9 +1402,24 @@ def _strict_random_crop_image(image,
keypoints: rank 3 float32 tensor with shape keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2] [num_instances, num_keypoints, 2]
keypoint_visibilities: rank 2 bool tensor with shape keypoint_visibilities: rank 2 bool tensor with shape
[num_instances, num_keypoints] [num_instances, num_keypoints]
densepose_num_points: rank 1 int32 tensor with shape [num_instances].
densepose_part_ids: rank 2 int32 tensor with shape
[num_instances, num_points].
densepose_surface_coords: rank 3 float32 tensor with shape
[num_instances, num_points, 4].
Raises:
ValueError: If some but not all of the DensePose tensors are provided.
""" """
with tf.name_scope('RandomCropImage', values=[image, boxes]): with tf.name_scope('RandomCropImage', values=[image, boxes]):
densepose_tensors = [densepose_num_points, densepose_part_ids,
densepose_surface_coords]
if (any(t is not None for t in densepose_tensors) and
not all(t is not None for t in densepose_tensors)):
raise ValueError('If cropping DensePose labels, must provide '
'`densepose_num_points`, `densepose_part_ids`, and '
'`densepose_surface_coords`')
image_shape = tf.shape(image) image_shape = tf.shape(image)
# boxes are [N, 4]. Lets first make them [N, 1, 4]. # boxes are [N, 4]. Lets first make them [N, 1, 4].
...@@ -1464,6 +1530,23 @@ def _strict_random_crop_image(image, ...@@ -1464,6 +1530,23 @@ def _strict_random_crop_image(image,
new_keypoints, kpt_vis_of_boxes_completely_inside_window) new_keypoints, kpt_vis_of_boxes_completely_inside_window)
result.append(new_kpt_visibilities) result.append(new_kpt_visibilities)
if densepose_num_points is not None:
filtered_dp_tensors = []
for dp_tensor in densepose_tensors:
dp_tensor_inside_window = tf.gather(dp_tensor, inside_window_ids)
dp_tensor_completely_inside_window = tf.gather(dp_tensor_inside_window,
keep_ids)
filtered_dp_tensors.append(dp_tensor_completely_inside_window)
new_dp_num_points = filtered_dp_tensors[0]
new_dp_point_ids = filtered_dp_tensors[1]
new_dp_surf_coords = densepose_ops.change_coordinate_frame(
filtered_dp_tensors[2], im_box_rank1)
if clip_boxes:
new_dp_num_points, new_dp_point_ids, new_dp_surf_coords = (
densepose_ops.prune_outside_window(
new_dp_num_points, new_dp_point_ids, new_dp_surf_coords,
window=[0.0, 0.0, 1.0, 1.0]))
result.extend([new_dp_num_points, new_dp_point_ids, new_dp_surf_coords])
return tuple(result) return tuple(result)
...@@ -1476,6 +1559,9 @@ def random_crop_image(image, ...@@ -1476,6 +1559,9 @@ def random_crop_image(image,
masks=None, masks=None,
keypoints=None, keypoints=None,
keypoint_visibilities=None, keypoint_visibilities=None,
densepose_num_points=None,
densepose_part_ids=None,
densepose_surface_coords=None,
min_object_covered=1.0, min_object_covered=1.0,
aspect_ratio_range=(0.75, 1.33), aspect_ratio_range=(0.75, 1.33),
area_range=(0.1, 1.0), area_range=(0.1, 1.0),
...@@ -1523,6 +1609,19 @@ def random_crop_image(image, ...@@ -1523,6 +1609,19 @@ def random_crop_image(image,
normalized coordinates. normalized coordinates.
keypoint_visibilities: (optional) rank 2 bool tensor with shape keypoint_visibilities: (optional) rank 2 bool tensor with shape
[num_instances, num_keypoints]. [num_instances, num_keypoints].
densepose_num_points: (optional) rank 1 int32 tensor with shape
[num_instances] with the number of sampled points per
instance.
densepose_part_ids: (optional) rank 2 int32 tensor with shape
[num_instances, num_points] holding the part id for each
sampled point. These part_ids are 0-indexed, where the
first non-background part has index 0.
densepose_surface_coords: (optional) rank 3 float32 tensor with shape
[num_instances, num_points, 4]. The DensePose
coordinates are of the form (y, x, v, u) where
(y, x) are the normalized image coordinates for a
sampled point, and (v, u) is the surface
coordinate for the part.
min_object_covered: the cropped image must cover at least this fraction of min_object_covered: the cropped image must cover at least this fraction of
at least one of the input bounding boxes. at least one of the input bounding boxes.
aspect_ratio_range: allowed range for aspect ratio of cropped image. aspect_ratio_range: allowed range for aspect ratio of cropped image.
...@@ -1547,8 +1646,9 @@ def random_crop_image(image, ...@@ -1547,8 +1646,9 @@ def random_crop_image(image,
form. form.
labels: new labels. labels: new labels.
If label_weights, multiclass_scores, masks, keypoints, keypoint_visibilities If label_weights, multiclass_scores, masks, keypoints,
is not None, the function also returns: keypoint_visibilities, densepose_num_points, densepose_part_ids,
densepose_surface_coords is not None, the function also returns:
label_weights: rank 1 float32 tensor with shape [num_instances]. label_weights: rank 1 float32 tensor with shape [num_instances].
multiclass_scores: rank 2 float32 tensor with shape multiclass_scores: rank 2 float32 tensor with shape
[num_instances, num_classes] [num_instances, num_classes]
...@@ -1557,7 +1657,12 @@ def random_crop_image(image, ...@@ -1557,7 +1657,12 @@ def random_crop_image(image,
keypoints: rank 3 float32 tensor with shape keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2] [num_instances, num_keypoints, 2]
keypoint_visibilities: rank 2 bool tensor with shape keypoint_visibilities: rank 2 bool tensor with shape
[num_instances, num_keypoints] [num_instances, num_keypoints]
densepose_num_points: rank 1 int32 tensor with shape [num_instances].
densepose_part_ids: rank 2 int32 tensor with shape
[num_instances, num_points].
densepose_surface_coords: rank 3 float32 tensor with shape
[num_instances, num_points, 4].
""" """
def strict_random_crop_image_fn(): def strict_random_crop_image_fn():
...@@ -1571,6 +1676,9 @@ def random_crop_image(image, ...@@ -1571,6 +1676,9 @@ def random_crop_image(image,
masks=masks, masks=masks,
keypoints=keypoints, keypoints=keypoints,
keypoint_visibilities=keypoint_visibilities, keypoint_visibilities=keypoint_visibilities,
densepose_num_points=densepose_num_points,
densepose_part_ids=densepose_part_ids,
densepose_surface_coords=densepose_surface_coords,
min_object_covered=min_object_covered, min_object_covered=min_object_covered,
aspect_ratio_range=aspect_ratio_range, aspect_ratio_range=aspect_ratio_range,
area_range=area_range, area_range=area_range,
...@@ -1602,6 +1710,9 @@ def random_crop_image(image, ...@@ -1602,6 +1710,9 @@ def random_crop_image(image,
outputs.append(keypoints) outputs.append(keypoints)
if keypoint_visibilities is not None: if keypoint_visibilities is not None:
outputs.append(keypoint_visibilities) outputs.append(keypoint_visibilities)
if densepose_num_points is not None:
outputs.extend([densepose_num_points, densepose_part_ids,
densepose_surface_coords])
result = tf.cond(do_a_crop_random, strict_random_crop_image_fn, result = tf.cond(do_a_crop_random, strict_random_crop_image_fn,
lambda: tuple(outputs)) lambda: tuple(outputs))
...@@ -1612,6 +1723,7 @@ def random_pad_image(image, ...@@ -1612,6 +1723,7 @@ def random_pad_image(image,
boxes, boxes,
masks=None, masks=None,
keypoints=None, keypoints=None,
densepose_surface_coords=None,
min_image_size=None, min_image_size=None,
max_image_size=None, max_image_size=None,
pad_color=None, pad_color=None,
...@@ -1639,6 +1751,11 @@ def random_pad_image(image, ...@@ -1639,6 +1751,11 @@ def random_pad_image(image,
keypoints: (optional) rank 3 float32 tensor with shape keypoints: (optional) rank 3 float32 tensor with shape
[N, num_keypoints, 2]. The keypoints are in y-x normalized [N, num_keypoints, 2]. The keypoints are in y-x normalized
coordinates. coordinates.
densepose_surface_coords: (optional) rank 3 float32 tensor with shape
[N, num_points, 4]. The DensePose coordinates are
of the form (y, x, v, u) where (y, x) are the
normalized image coordinates for a sampled point,
and (v, u) is the surface coordinate for the part.
min_image_size: a tensor of size [min_height, min_width], type tf.int32. min_image_size: a tensor of size [min_height, min_width], type tf.int32.
If passed as None, will be set to image size If passed as None, will be set to image size
[height, width]. [height, width].
...@@ -1663,6 +1780,9 @@ def random_pad_image(image, ...@@ -1663,6 +1780,9 @@ def random_pad_image(image,
masks: rank 3 float32 tensor with shape [N, new_height, new_width] masks: rank 3 float32 tensor with shape [N, new_height, new_width]
if keypoints is not None, the function also returns: if keypoints is not None, the function also returns:
keypoints: rank 3 float32 tensor with shape [N, num_keypoints, 2] keypoints: rank 3 float32 tensor with shape [N, num_keypoints, 2]
if densepose_surface_coords is not None, the function also returns:
densepose_surface_coords: rank 3 float32 tensor with shape
[num_instances, num_points, 4]
""" """
if pad_color is None: if pad_color is None:
pad_color = tf.reduce_mean(image, axis=[0, 1]) pad_color = tf.reduce_mean(image, axis=[0, 1])
...@@ -1754,6 +1874,11 @@ def random_pad_image(image, ...@@ -1754,6 +1874,11 @@ def random_pad_image(image,
new_keypoints = keypoint_ops.change_coordinate_frame(keypoints, new_window) new_keypoints = keypoint_ops.change_coordinate_frame(keypoints, new_window)
result.append(new_keypoints) result.append(new_keypoints)
if densepose_surface_coords is not None:
new_densepose_surface_coords = densepose_ops.change_coordinate_frame(
densepose_surface_coords, new_window)
result.append(new_densepose_surface_coords)
return tuple(result) return tuple(result)
...@@ -1761,6 +1886,7 @@ def random_absolute_pad_image(image, ...@@ -1761,6 +1886,7 @@ def random_absolute_pad_image(image,
boxes, boxes,
masks=None, masks=None,
keypoints=None, keypoints=None,
densepose_surface_coords=None,
max_height_padding=None, max_height_padding=None,
max_width_padding=None, max_width_padding=None,
pad_color=None, pad_color=None,
...@@ -1785,6 +1911,11 @@ def random_absolute_pad_image(image, ...@@ -1785,6 +1911,11 @@ def random_absolute_pad_image(image,
keypoints: (optional) rank 3 float32 tensor with shape keypoints: (optional) rank 3 float32 tensor with shape
[N, num_keypoints, 2]. The keypoints are in y-x normalized [N, num_keypoints, 2]. The keypoints are in y-x normalized
coordinates. coordinates.
densepose_surface_coords: (optional) rank 3 float32 tensor with shape
[N, num_points, 4]. The DensePose coordinates are
of the form (y, x, v, u) where (y, x) are the
normalized image coordinates for a sampled point,
and (v, u) is the surface coordinate for the part.
max_height_padding: a scalar tf.int32 tensor denoting the maximum amount of max_height_padding: a scalar tf.int32 tensor denoting the maximum amount of
height padding. The padding will be chosen uniformly at height padding. The padding will be chosen uniformly at
random from [0, max_height_padding). random from [0, max_height_padding).
...@@ -1817,6 +1948,7 @@ def random_absolute_pad_image(image, ...@@ -1817,6 +1948,7 @@ def random_absolute_pad_image(image,
boxes, boxes,
masks=masks, masks=masks,
keypoints=keypoints, keypoints=keypoints,
densepose_surface_coords=densepose_surface_coords,
min_image_size=min_image_size, min_image_size=min_image_size,
max_image_size=max_image_size, max_image_size=max_image_size,
pad_color=pad_color, pad_color=pad_color,
...@@ -3852,7 +3984,7 @@ def random_square_crop_by_scale(image, boxes, labels, label_weights, ...@@ -3852,7 +3984,7 @@ def random_square_crop_by_scale(image, boxes, labels, label_weights,
Args: Args:
image: rank 3 float32 tensor containing 1 image -> image: rank 3 float32 tensor containing 1 image ->
[height, width,channels]. [height, width, channels].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary Boxes are in normalized form meaning their coordinates vary
between [0, 1]. Each row is in the form of [ymin, xmin, ymax, xmax]. between [0, 1]. Each row is in the form of [ymin, xmin, ymax, xmax].
...@@ -3996,12 +4128,138 @@ def random_square_crop_by_scale(image, boxes, labels, label_weights, ...@@ -3996,12 +4128,138 @@ def random_square_crop_by_scale(image, boxes, labels, label_weights,
return return_values return return_values
def random_scale_crop_and_pad_to_square(
image,
boxes,
labels,
label_weights,
masks=None,
keypoints=None,
scale_min=0.1,
scale_max=2.0,
output_size=512,
resize_method=tf.image.ResizeMethod.BILINEAR,
seed=None):
"""Randomly scale, crop, and then pad an image to fixed square dimensions.
Randomly scale, crop, and then pad an image to the desired square output
dimensions. Specifically, this method first samples a random_scale factor
from a uniform distribution between scale_min and scale_max, and then resizes
the image such that it's maximum dimension is (output_size * random_scale).
Secondly, a square output_size crop is extracted from the resized image
(note, this will only occur when random_scale > 1.0). Lastly, the cropped
region is padded to the desired square output_size, by filling with zeros.
The augmentation is borrowed from [1]
[1]: https://arxiv.org/abs/1911.09070
Args:
image: rank 3 float32 tensor containing 1 image ->
[height, width, channels].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. Boxes
are in normalized form meaning their coordinates vary between [0, 1]. Each
row is in the form of [ymin, xmin, ymax, xmax]. Boxes on the crop boundary
are clipped to the boundary and boxes falling outside the crop are
ignored.
labels: rank 1 int32 tensor containing the object classes.
label_weights: float32 tensor of shape [num_instances] representing the
weight for each box.
masks: (optional) rank 3 float32 tensor with shape [num_instances, height,
width] containing instance masks. The masks are of the same height, width
as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape [num_instances,
num_keypoints, 2]. The keypoints are in y-x normalized coordinates.
scale_min: float, the minimum value for the random scale factor.
scale_max: float, the maximum value for the random scale factor.
output_size: int, the desired (square) output image size.
resize_method: tf.image.ResizeMethod, resize method to use when scaling the
input images.
seed: random seed.
Returns:
image: image which is the same rank as input image.
boxes: boxes which is the same rank as input boxes.
Boxes are in normalized form.
labels: new labels.
label_weights: rank 1 float32 tensor with shape [num_instances].
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
"""
img_shape = tf.shape(image)
input_height, input_width = img_shape[0], img_shape[1]
random_scale = tf.random_uniform([], scale_min, scale_max, seed=seed)
# Compute the scaled height and width from the random scale.
max_input_dim = tf.cast(tf.maximum(input_height, input_width), tf.float32)
input_ar_y = tf.cast(input_height, tf.float32) / max_input_dim
input_ar_x = tf.cast(input_width, tf.float32) / max_input_dim
scaled_height = tf.cast(random_scale * output_size * input_ar_y, tf.int32)
scaled_width = tf.cast(random_scale * output_size * input_ar_x, tf.int32)
# Compute the offsets:
offset_y = tf.cast(scaled_height - output_size, tf.float32)
offset_x = tf.cast(scaled_width - output_size, tf.float32)
offset_y = tf.maximum(0.0, offset_y) * tf.random_uniform([], 0, 1, seed=seed)
offset_x = tf.maximum(0.0, offset_x) * tf.random_uniform([], 0, 1, seed=seed)
offset_y = tf.cast(offset_y, tf.int32)
offset_x = tf.cast(offset_x, tf.int32)
# Scale, crop, and pad the input image.
scaled_image = tf.image.resize_images(
image, [scaled_height, scaled_width], method=resize_method)
scaled_image = scaled_image[offset_y:offset_y + output_size,
offset_x:offset_x + output_size, :]
output_image = tf.image.pad_to_bounding_box(scaled_image, 0, 0, output_size,
output_size)
# Update the boxes.
new_window = tf.cast(
tf.stack([offset_y, offset_x,
offset_y + output_size, offset_x + output_size]),
dtype=tf.float32)
new_window /= tf.cast(
tf.stack([scaled_height, scaled_width, scaled_height, scaled_width]),
dtype=tf.float32)
boxlist = box_list.BoxList(boxes)
boxlist = box_list_ops.change_coordinate_frame(boxlist, new_window)
boxlist, indices = box_list_ops.prune_completely_outside_window(
boxlist, [0.0, 0.0, 1.0, 1.0])
boxlist = box_list_ops.clip_to_window(
boxlist, [0.0, 0.0, 1.0, 1.0], filter_nonoverlapping=False)
return_values = [output_image, boxlist.get(),
tf.gather(labels, indices),
tf.gather(label_weights, indices)]
if masks is not None:
new_masks = tf.expand_dims(masks, -1)
new_masks = tf.image.resize_images(
new_masks, [scaled_height, scaled_width], method=resize_method)
new_masks = new_masks[:, offset_y:offset_y + output_size,
offset_x:offset_x + output_size, :]
new_masks = tf.image.pad_to_bounding_box(
new_masks, 0, 0, output_size, output_size)
new_masks = tf.squeeze(new_masks, [-1])
return_values.append(tf.gather(new_masks, indices))
if keypoints is not None:
keypoints = tf.gather(keypoints, indices)
keypoints = keypoint_ops.change_coordinate_frame(keypoints, new_window)
keypoints = keypoint_ops.prune_outside_window(
keypoints, [0.0, 0.0, 1.0, 1.0])
return_values.append(keypoints)
return return_values
def get_default_func_arg_map(include_label_weights=True, def get_default_func_arg_map(include_label_weights=True,
include_label_confidences=False, include_label_confidences=False,
include_multiclass_scores=False, include_multiclass_scores=False,
include_instance_masks=False, include_instance_masks=False,
include_keypoints=False, include_keypoints=False,
include_keypoint_visibilities=False): include_keypoint_visibilities=False,
include_dense_pose=False):
"""Returns the default mapping from a preprocessor function to its args. """Returns the default mapping from a preprocessor function to its args.
Args: Args:
...@@ -4017,6 +4275,8 @@ def get_default_func_arg_map(include_label_weights=True, ...@@ -4017,6 +4275,8 @@ def get_default_func_arg_map(include_label_weights=True,
keypoints, too. keypoints, too.
include_keypoint_visibilities: If True, preprocessing functions will modify include_keypoint_visibilities: If True, preprocessing functions will modify
the keypoint visibilities, too. the keypoint visibilities, too.
include_dense_pose: If True, preprocessing functions will modify the
DensePose labels, too.
Returns: Returns:
A map from preprocessing functions to the arguments they receive. A map from preprocessing functions to the arguments they receive.
...@@ -4049,6 +4309,17 @@ def get_default_func_arg_map(include_label_weights=True, ...@@ -4049,6 +4309,17 @@ def get_default_func_arg_map(include_label_weights=True,
groundtruth_keypoint_visibilities = ( groundtruth_keypoint_visibilities = (
fields.InputDataFields.groundtruth_keypoint_visibilities) fields.InputDataFields.groundtruth_keypoint_visibilities)
groundtruth_dp_num_points = None
groundtruth_dp_part_ids = None
groundtruth_dp_surface_coords = None
if include_dense_pose:
groundtruth_dp_num_points = (
fields.InputDataFields.groundtruth_dp_num_points)
groundtruth_dp_part_ids = (
fields.InputDataFields.groundtruth_dp_part_ids)
groundtruth_dp_surface_coords = (
fields.InputDataFields.groundtruth_dp_surface_coords)
prep_func_arg_map = { prep_func_arg_map = {
normalize_image: (fields.InputDataFields.image,), normalize_image: (fields.InputDataFields.image,),
random_horizontal_flip: ( random_horizontal_flip: (
...@@ -4057,6 +4328,8 @@ def get_default_func_arg_map(include_label_weights=True, ...@@ -4057,6 +4328,8 @@ def get_default_func_arg_map(include_label_weights=True,
groundtruth_instance_masks, groundtruth_instance_masks,
groundtruth_keypoints, groundtruth_keypoints,
groundtruth_keypoint_visibilities, groundtruth_keypoint_visibilities,
groundtruth_dp_part_ids,
groundtruth_dp_surface_coords,
), ),
random_vertical_flip: ( random_vertical_flip: (
fields.InputDataFields.image, fields.InputDataFields.image,
...@@ -4082,21 +4355,22 @@ def get_default_func_arg_map(include_label_weights=True, ...@@ -4082,21 +4355,22 @@ def get_default_func_arg_map(include_label_weights=True,
random_adjust_saturation: (fields.InputDataFields.image,), random_adjust_saturation: (fields.InputDataFields.image,),
random_distort_color: (fields.InputDataFields.image,), random_distort_color: (fields.InputDataFields.image,),
random_jitter_boxes: (fields.InputDataFields.groundtruth_boxes,), random_jitter_boxes: (fields.InputDataFields.groundtruth_boxes,),
random_crop_image: (fields.InputDataFields.image, random_crop_image:
fields.InputDataFields.groundtruth_boxes, (fields.InputDataFields.image,
fields.InputDataFields.groundtruth_classes, fields.InputDataFields.groundtruth_boxes,
groundtruth_label_weights, fields.InputDataFields.groundtruth_classes,
groundtruth_label_confidences, multiclass_scores, groundtruth_label_weights, groundtruth_label_confidences,
groundtruth_instance_masks, groundtruth_keypoints, multiclass_scores, groundtruth_instance_masks, groundtruth_keypoints,
groundtruth_keypoint_visibilities), groundtruth_keypoint_visibilities, groundtruth_dp_num_points,
groundtruth_dp_part_ids, groundtruth_dp_surface_coords),
random_pad_image: random_pad_image:
(fields.InputDataFields.image, (fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes, groundtruth_instance_masks, fields.InputDataFields.groundtruth_boxes, groundtruth_instance_masks,
groundtruth_keypoints), groundtruth_keypoints, groundtruth_dp_surface_coords),
random_absolute_pad_image: random_absolute_pad_image:
(fields.InputDataFields.image, (fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes, groundtruth_instance_masks, fields.InputDataFields.groundtruth_boxes, groundtruth_instance_masks,
groundtruth_keypoints), groundtruth_keypoints, groundtruth_dp_surface_coords),
random_crop_pad_image: (fields.InputDataFields.image, random_crop_pad_image: (fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes, fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes, fields.InputDataFields.groundtruth_classes,
...@@ -4211,6 +4485,12 @@ def get_default_func_arg_map(include_label_weights=True, ...@@ -4211,6 +4485,12 @@ def get_default_func_arg_map(include_label_weights=True,
fields.InputDataFields.groundtruth_classes, fields.InputDataFields.groundtruth_classes,
groundtruth_label_weights, groundtruth_instance_masks, groundtruth_label_weights, groundtruth_instance_masks,
groundtruth_keypoints), groundtruth_keypoints),
random_scale_crop_and_pad_to_square:
(fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes,
groundtruth_label_weights, groundtruth_instance_masks,
groundtruth_keypoints),
} }
return prep_func_arg_map return prep_func_arg_map
......
...@@ -119,6 +119,24 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase): ...@@ -119,6 +119,24 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
]) ])
return tf.constant(keypoints, dtype=tf.float32) return tf.constant(keypoints, dtype=tf.float32)
def createTestDensePose(self):
dp_num_points = tf.constant([1, 3], dtype=tf.int32)
dp_part_ids = tf.constant(
[[4, 0, 0],
[1, 0, 5]], dtype=tf.int32)
dp_surface_coords = tf.constant(
[
# Instance 0.
[[0.1, 0.2, 0.6, 0.7],
[0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0]],
# Instance 1.
[[0.8, 0.9, 0.2, 0.4],
[0.1, 0.3, 0.2, 0.8],
[0.6, 1.0, 0.3, 0.4]],
], dtype=tf.float32)
return dp_num_points, dp_part_ids, dp_surface_coords
def createKeypointFlipPermutation(self): def createKeypointFlipPermutation(self):
return [0, 2, 1] return [0, 2, 1]
...@@ -694,51 +712,6 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase): ...@@ -694,51 +712,6 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
test_masks=True, test_masks=True,
test_keypoints=True) test_keypoints=True)
def testRunRandomHorizontalFlipWithMaskAndKeypoints(self):
def graph_fn():
preprocess_options = [(preprocessor.random_horizontal_flip, {})]
image_height = 3
image_width = 3
images = tf.random_uniform([1, image_height, image_width, 3])
boxes = self.createTestBoxes()
masks = self.createTestMasks()
keypoints, keypoint_visibilities = self.createTestKeypoints()
keypoint_flip_permutation = self.createKeypointFlipPermutation()
tensor_dict = {
fields.InputDataFields.image:
images,
fields.InputDataFields.groundtruth_boxes:
boxes,
fields.InputDataFields.groundtruth_instance_masks:
masks,
fields.InputDataFields.groundtruth_keypoints:
keypoints,
fields.InputDataFields.groundtruth_keypoint_visibilities:
keypoint_visibilities
}
preprocess_options = [(preprocessor.random_horizontal_flip, {
'keypoint_flip_permutation': keypoint_flip_permutation
})]
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_instance_masks=True,
include_keypoints=True,
include_keypoint_visibilities=True)
tensor_dict = preprocessor.preprocess(
tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map)
boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints]
keypoint_visibilities = tensor_dict[
fields.InputDataFields.groundtruth_keypoint_visibilities]
return [boxes, masks, keypoints, keypoint_visibilities]
boxes, masks, keypoints, keypoint_visibilities = self.execute_cpu(
graph_fn, [])
self.assertIsNotNone(boxes)
self.assertIsNotNone(masks)
self.assertIsNotNone(keypoints)
self.assertIsNotNone(keypoint_visibilities)
def testRandomVerticalFlip(self): def testRandomVerticalFlip(self):
...@@ -1886,6 +1859,65 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase): ...@@ -1886,6 +1859,65 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
self.assertAllClose( self.assertAllClose(
distorted_keypoints_.flatten(), expected_keypoints.flatten()) distorted_keypoints_.flatten(), expected_keypoints.flatten())
def testRunRandomCropImageWithDensePose(self):
def graph_fn():
image = self.createColorfulTestImage()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
weights = self.createTestGroundtruthWeights()
dp_num_points, dp_part_ids, dp_surface_coords = self.createTestDensePose()
tensor_dict = {
fields.InputDataFields.image: image,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels,
fields.InputDataFields.groundtruth_weights: weights,
fields.InputDataFields.groundtruth_dp_num_points: dp_num_points,
fields.InputDataFields.groundtruth_dp_part_ids: dp_part_ids,
fields.InputDataFields.groundtruth_dp_surface_coords:
dp_surface_coords
}
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_dense_pose=True)
preprocessing_options = [(preprocessor.random_crop_image, {})]
with mock.patch.object(
tf.image,
'sample_distorted_bounding_box'
) as mock_sample_distorted_bounding_box:
mock_sample_distorted_bounding_box.return_value = (
tf.constant([6, 40, 0], dtype=tf.int32),
tf.constant([134, 340, -1], dtype=tf.int32),
tf.constant([[[0.03, 0.1, 0.7, 0.95]]], dtype=tf.float32))
distorted_tensor_dict = preprocessor.preprocess(
tensor_dict,
preprocessing_options,
func_arg_map=preprocessor_arg_map)
distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
distorted_dp_num_points = distorted_tensor_dict[
fields.InputDataFields.groundtruth_dp_num_points]
distorted_dp_part_ids = distorted_tensor_dict[
fields.InputDataFields.groundtruth_dp_part_ids]
distorted_dp_surface_coords = distorted_tensor_dict[
fields.InputDataFields.groundtruth_dp_surface_coords]
return [distorted_image, distorted_dp_num_points, distorted_dp_part_ids,
distorted_dp_surface_coords]
(distorted_image_, distorted_dp_num_points_, distorted_dp_part_ids_,
distorted_dp_surface_coords_) = self.execute_cpu(graph_fn, [])
expected_dp_num_points = np.array([1, 1])
expected_dp_part_ids = np.array([[4], [0]])
expected_dp_surface_coords = np.array([
[[0.10447761, 0.1176470, 0.6, 0.7]],
[[0.10447761, 0.2352941, 0.2, 0.8]],
])
self.assertAllEqual(distorted_image_.shape, [1, 134, 340, 3])
self.assertAllEqual(distorted_dp_num_points_, expected_dp_num_points)
self.assertAllEqual(distorted_dp_part_ids_, expected_dp_part_ids)
self.assertAllClose(distorted_dp_surface_coords_,
expected_dp_surface_coords)
def testRunRetainBoxesAboveThreshold(self): def testRunRetainBoxesAboveThreshold(self):
def graph_fn(): def graph_fn():
boxes = self.createTestBoxes() boxes = self.createTestBoxes()
...@@ -2276,7 +2308,10 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase): ...@@ -2276,7 +2308,10 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= ( self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= (
padded_boxes_[:, 3] - padded_boxes_[:, 1]))) padded_boxes_[:, 3] - padded_boxes_[:, 1])))
def testRandomPadImageWithKeypointsAndMasks(self): @parameterized.parameters(
{'include_dense_pose': False},
)
def testRandomPadImageWithKeypointsAndMasks(self, include_dense_pose):
def graph_fn(): def graph_fn():
preprocessing_options = [(preprocessor.normalize_image, { preprocessing_options = [(preprocessor.normalize_image, {
'original_minval': 0, 'original_minval': 0,
...@@ -2290,12 +2325,15 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase): ...@@ -2290,12 +2325,15 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
labels = self.createTestLabels() labels = self.createTestLabels()
masks = self.createTestMasks() masks = self.createTestMasks()
keypoints, _ = self.createTestKeypoints() keypoints, _ = self.createTestKeypoints()
_, _, dp_surface_coords = self.createTestDensePose()
tensor_dict = { tensor_dict = {
fields.InputDataFields.image: images, fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes, fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels, fields.InputDataFields.groundtruth_classes: labels,
fields.InputDataFields.groundtruth_instance_masks: masks, fields.InputDataFields.groundtruth_instance_masks: masks,
fields.InputDataFields.groundtruth_keypoints: keypoints, fields.InputDataFields.groundtruth_keypoints: keypoints,
fields.InputDataFields.groundtruth_dp_surface_coords:
dp_surface_coords
} }
tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
images = tensor_dict[fields.InputDataFields.image] images = tensor_dict[fields.InputDataFields.image]
...@@ -2304,7 +2342,8 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase): ...@@ -2304,7 +2342,8 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
func_arg_map = preprocessor.get_default_func_arg_map( func_arg_map = preprocessor.get_default_func_arg_map(
include_instance_masks=True, include_instance_masks=True,
include_keypoints=True, include_keypoints=True,
include_keypoint_visibilities=True) include_keypoint_visibilities=True,
include_dense_pose=include_dense_pose)
padded_tensor_dict = preprocessor.preprocess(tensor_dict, padded_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options, preprocessing_options,
func_arg_map=func_arg_map) func_arg_map=func_arg_map)
...@@ -2323,15 +2362,29 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase): ...@@ -2323,15 +2362,29 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
padded_keypoints_shape = tf.shape(padded_keypoints) padded_keypoints_shape = tf.shape(padded_keypoints)
images_shape = tf.shape(images) images_shape = tf.shape(images)
padded_images_shape = tf.shape(padded_images) padded_images_shape = tf.shape(padded_images)
return [boxes_shape, padded_boxes_shape, padded_masks_shape, outputs = [boxes_shape, padded_boxes_shape, padded_masks_shape,
keypoints_shape, padded_keypoints_shape, images_shape, keypoints_shape, padded_keypoints_shape, images_shape,
padded_images_shape, boxes, padded_boxes, keypoints, padded_images_shape, boxes, padded_boxes, keypoints,
padded_keypoints] padded_keypoints]
if include_dense_pose:
(boxes_shape_, padded_boxes_shape_, padded_masks_shape_, padded_dp_surface_coords = padded_tensor_dict[
keypoints_shape_, padded_keypoints_shape_, images_shape_, fields.InputDataFields.groundtruth_dp_surface_coords]
padded_images_shape_, boxes_, padded_boxes_, outputs.extend([dp_surface_coords, padded_dp_surface_coords])
keypoints_, padded_keypoints_) = self.execute_cpu(graph_fn, []) return outputs
outputs = self.execute_cpu(graph_fn, [])
boxes_shape_ = outputs[0]
padded_boxes_shape_ = outputs[1]
padded_masks_shape_ = outputs[2]
keypoints_shape_ = outputs[3]
padded_keypoints_shape_ = outputs[4]
images_shape_ = outputs[5]
padded_images_shape_ = outputs[6]
boxes_ = outputs[7]
padded_boxes_ = outputs[8]
keypoints_ = outputs[9]
padded_keypoints_ = outputs[10]
self.assertAllEqual(boxes_shape_, padded_boxes_shape_) self.assertAllEqual(boxes_shape_, padded_boxes_shape_)
self.assertAllEqual(keypoints_shape_, padded_keypoints_shape_) self.assertAllEqual(keypoints_shape_, padded_keypoints_shape_)
self.assertTrue((images_shape_[1] >= padded_images_shape_[1] * 0.5).all) self.assertTrue((images_shape_[1] >= padded_images_shape_[1] * 0.5).all)
...@@ -2347,6 +2400,11 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase): ...@@ -2347,6 +2400,11 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
padded_keypoints_[1, :, 0] - padded_keypoints_[0, :, 0]))) padded_keypoints_[1, :, 0] - padded_keypoints_[0, :, 0])))
self.assertTrue(np.all((keypoints_[1, :, 1] - keypoints_[0, :, 1]) >= ( self.assertTrue(np.all((keypoints_[1, :, 1] - keypoints_[0, :, 1]) >= (
padded_keypoints_[1, :, 1] - padded_keypoints_[0, :, 1]))) padded_keypoints_[1, :, 1] - padded_keypoints_[0, :, 1])))
if include_dense_pose:
dp_surface_coords = outputs[11]
padded_dp_surface_coords = outputs[12]
self.assertAllClose(padded_dp_surface_coords[:, :, 2:],
dp_surface_coords[:, :, 2:])
def testRandomAbsolutePadImage(self): def testRandomAbsolutePadImage(self):
height_padding = 10 height_padding = 10
...@@ -3783,6 +3841,90 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase): ...@@ -3783,6 +3841,90 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
size = max(image.shape) size = max(image.shape)
self.assertAlmostEqual(scale * 256.0, size) self.assertAlmostEqual(scale * 256.0, size)
self.assertAllClose(image[:, :, 0], masks[0, :, :])
@parameterized.named_parameters(('scale_0_1', 0.1), ('scale_1_0', 1.0),
('scale_2_0', 2.0))
def test_random_scale_crop_and_pad_to_square(self, scale):
def graph_fn():
image = np.random.randn(512, 256, 1)
box_centers = [0.25, 0.5, 0.75]
box_size = 0.1
box_corners = []
box_labels = []
box_label_weights = []
keypoints = []
masks = []
for center_y in box_centers:
for center_x in box_centers:
box_corners.append(
[center_y - box_size / 2.0, center_x - box_size / 2.0,
center_y + box_size / 2.0, center_x + box_size / 2.0])
box_labels.append([1])
box_label_weights.append([1.])
keypoints.append(
[[center_y - box_size / 2.0, center_x - box_size / 2.0],
[center_y + box_size / 2.0, center_x + box_size / 2.0]])
masks.append(image[:, :, 0].reshape(512, 256))
image = tf.constant(image)
boxes = tf.constant(box_corners)
labels = tf.constant(box_labels)
label_weights = tf.constant(box_label_weights)
keypoints = tf.constant(keypoints)
masks = tf.constant(np.stack(masks))
(new_image, new_boxes, _, _, new_masks,
new_keypoints) = preprocessor.random_scale_crop_and_pad_to_square(
image,
boxes,
labels,
label_weights,
masks=masks,
keypoints=keypoints,
scale_min=scale,
scale_max=scale,
output_size=512)
return new_image, new_boxes, new_masks, new_keypoints
image, boxes, masks, keypoints = self.execute_cpu(graph_fn, [])
# Since random_scale_crop_and_pad_to_square may prune and clip boxes,
# we only need to find one of the boxes that was not clipped and check
# that it matches the expected dimensions. Note, assertAlmostEqual(a, b)
# is equivalent to round(a-b, 7) == 0.
any_box_has_correct_size = False
effective_scale_y = int(scale * 512) / 512.0
effective_scale_x = int(scale * 256) / 512.0
expected_size_y = 0.1 * effective_scale_y
expected_size_x = 0.1 * effective_scale_x
for box in boxes:
ymin, xmin, ymax, xmax = box
any_box_has_correct_size |= (
(round(ymin, 7) != 0.0) and (round(xmin, 7) != 0.0) and
(round(ymax, 7) != 1.0) and (round(xmax, 7) != 1.0) and
(round((ymax - ymin) - expected_size_y, 7) == 0.0) and
(round((xmax - xmin) - expected_size_x, 7) == 0.0))
self.assertTrue(any_box_has_correct_size)
# Similar to the approach above where we check for at least one box with the
# expected dimensions, we check for at least one pair of keypoints whose
# distance matches the expected dimensions.
any_keypoint_pair_has_correct_dist = False
for keypoint_pair in keypoints:
ymin, xmin = keypoint_pair[0]
ymax, xmax = keypoint_pair[1]
any_keypoint_pair_has_correct_dist |= (
(round(ymin, 7) != 0.0) and (round(xmin, 7) != 0.0) and
(round(ymax, 7) != 1.0) and (round(xmax, 7) != 1.0) and
(round((ymax - ymin) - expected_size_y, 7) == 0.0) and
(round((xmax - xmin) - expected_size_x, 7) == 0.0))
self.assertTrue(any_keypoint_pair_has_correct_dist)
self.assertAlmostEqual(512.0, image.shape[0])
self.assertAlmostEqual(512.0, image.shape[1])
self.assertAllClose(image[:, :, 0], self.assertAllClose(image[:, :, 0],
masks[0, :, :]) masks[0, :, :])
......
...@@ -66,6 +66,11 @@ class InputDataFields(object): ...@@ -66,6 +66,11 @@ class InputDataFields(object):
groundtruth_keypoint_weights: groundtruth weight factor for keypoints. groundtruth_keypoint_weights: groundtruth weight factor for keypoints.
groundtruth_label_weights: groundtruth label weights. groundtruth_label_weights: groundtruth label weights.
groundtruth_weights: groundtruth weight factor for bounding boxes. groundtruth_weights: groundtruth weight factor for bounding boxes.
groundtruth_dp_num_points: The number of DensePose sampled points for each
instance.
groundtruth_dp_part_ids: Part indices for DensePose points.
groundtruth_dp_surface_coords: Image locations and UV coordinates for
DensePose points.
num_groundtruth_boxes: number of groundtruth boxes. num_groundtruth_boxes: number of groundtruth boxes.
is_annotated: whether an image has been labeled or not. is_annotated: whether an image has been labeled or not.
true_image_shapes: true shapes of images in the resized images, as resized true_image_shapes: true shapes of images in the resized images, as resized
...@@ -108,6 +113,9 @@ class InputDataFields(object): ...@@ -108,6 +113,9 @@ class InputDataFields(object):
groundtruth_keypoint_weights = 'groundtruth_keypoint_weights' groundtruth_keypoint_weights = 'groundtruth_keypoint_weights'
groundtruth_label_weights = 'groundtruth_label_weights' groundtruth_label_weights = 'groundtruth_label_weights'
groundtruth_weights = 'groundtruth_weights' groundtruth_weights = 'groundtruth_weights'
groundtruth_dp_num_points = 'groundtruth_dp_num_points'
groundtruth_dp_part_ids = 'groundtruth_dp_part_ids'
groundtruth_dp_surface_coords = 'groundtruth_dp_surface_coords'
num_groundtruth_boxes = 'num_groundtruth_boxes' num_groundtruth_boxes = 'num_groundtruth_boxes'
is_annotated = 'is_annotated' is_annotated = 'is_annotated'
true_image_shape = 'true_image_shape' true_image_shape = 'true_image_shape'
...@@ -133,6 +141,8 @@ class DetectionResultFields(object): ...@@ -133,6 +141,8 @@ class DetectionResultFields(object):
for detection boxes in the image including background class. for detection boxes in the image including background class.
detection_classes: detection-level class labels. detection_classes: detection-level class labels.
detection_masks: contains a segmentation mask for each detection box. detection_masks: contains a segmentation mask for each detection box.
detection_surface_coords: contains DensePose surface coordinates for each
box.
detection_boundaries: contains an object boundary for each detection box. detection_boundaries: contains an object boundary for each detection box.
detection_keypoints: contains detection keypoints for each detection box. detection_keypoints: contains detection keypoints for each detection box.
detection_keypoint_scores: contains detection keypoint scores. detection_keypoint_scores: contains detection keypoint scores.
...@@ -153,6 +163,7 @@ class DetectionResultFields(object): ...@@ -153,6 +163,7 @@ class DetectionResultFields(object):
detection_features = 'detection_features' detection_features = 'detection_features'
detection_classes = 'detection_classes' detection_classes = 'detection_classes'
detection_masks = 'detection_masks' detection_masks = 'detection_masks'
detection_surface_coords = 'detection_surface_coords'
detection_boundaries = 'detection_boundaries' detection_boundaries = 'detection_boundaries'
detection_keypoints = 'detection_keypoints' detection_keypoints = 'detection_keypoints'
detection_keypoint_scores = 'detection_keypoint_scores' detection_keypoint_scores = 'detection_keypoint_scores'
...@@ -174,7 +185,11 @@ class BoxListFields(object): ...@@ -174,7 +185,11 @@ class BoxListFields(object):
masks: masks per bounding box. masks: masks per bounding box.
boundaries: boundaries per bounding box. boundaries: boundaries per bounding box.
keypoints: keypoints per bounding box. keypoints: keypoints per bounding box.
keypoint_visibilities: keypoint visibilities per bounding box.
keypoint_heatmaps: keypoint heatmaps per bounding box. keypoint_heatmaps: keypoint heatmaps per bounding box.
densepose_num_points: number of DensePose points per bounding box.
densepose_part_ids: DensePose part ids per bounding box.
densepose_surface_coords: DensePose surface coordinates per bounding box.
is_crowd: is_crowd annotation per bounding box. is_crowd: is_crowd annotation per bounding box.
""" """
boxes = 'boxes' boxes = 'boxes'
...@@ -188,6 +203,9 @@ class BoxListFields(object): ...@@ -188,6 +203,9 @@ class BoxListFields(object):
keypoints = 'keypoints' keypoints = 'keypoints'
keypoint_visibilities = 'keypoint_visibilities' keypoint_visibilities = 'keypoint_visibilities'
keypoint_heatmaps = 'keypoint_heatmaps' keypoint_heatmaps = 'keypoint_heatmaps'
densepose_num_points = 'densepose_num_points'
densepose_part_ids = 'densepose_part_ids'
densepose_surface_coords = 'densepose_surface_coords'
is_crowd = 'is_crowd' is_crowd = 'is_crowd'
group_of = 'group_of' group_of = 'group_of'
......
...@@ -45,6 +45,7 @@ from object_detection.box_coders import mean_stddev_box_coder ...@@ -45,6 +45,7 @@ from object_detection.box_coders import mean_stddev_box_coder
from object_detection.core import box_coder from object_detection.core import box_coder
from object_detection.core import box_list from object_detection.core import box_list
from object_detection.core import box_list_ops from object_detection.core import box_list_ops
from object_detection.core import densepose_ops
from object_detection.core import keypoint_ops from object_detection.core import keypoint_ops
from object_detection.core import matcher as mat from object_detection.core import matcher as mat
from object_detection.core import region_similarity_calculator as sim_calc from object_detection.core import region_similarity_calculator as sim_calc
...@@ -799,17 +800,15 @@ def get_batch_predictions_from_indices(batch_predictions, indices): ...@@ -799,17 +800,15 @@ def get_batch_predictions_from_indices(batch_predictions, indices):
function. function.
Args: Args:
batch_predictions: A tensor of shape [batch_size, height, width, 2] for batch_predictions: A tensor of shape [batch_size, height, width, channels]
single class offsets and [batch_size, height, width, class, 2] for or [batch_size, height, width, class, channels] for class-specific
multiple classes offsets (e.g. keypoint joint offsets) representing the features (e.g. keypoint joint offsets).
(height, width) or (y_offset, x_offset) predictions over a batch. indices: A tensor of shape [num_instances, 3] for single class features or
indices: A tensor of shape [num_instances, 3] for single class offset and [num_instances, 4] for multiple classes features.
[num_instances, 4] for multiple classes offsets representing the indices
in the batch to be penalized in a loss function
Returns: Returns:
values: A tensor of shape [num_instances, 2] holding the predicted values values: A tensor of shape [num_instances, channels] holding the predicted
at the given indices. values at the given indices.
""" """
return tf.gather_nd(batch_predictions, indices) return tf.gather_nd(batch_predictions, indices)
...@@ -1601,6 +1600,17 @@ class CenterNetKeypointTargetAssigner(object): ...@@ -1601,6 +1600,17 @@ class CenterNetKeypointTargetAssigner(object):
return (batch_indices, batch_offsets, batch_weights) return (batch_indices, batch_offsets, batch_weights)
def _resize_masks(masks, height, width, method):
# Resize segmentation masks to conform to output dimensions. Use TF2
# image resize because TF1's version is buggy:
# https://yaqs.corp.google.com/eng/q/4970450458378240
masks = tf2.image.resize(
masks[:, :, :, tf.newaxis],
size=(height, width),
method=method)
return masks[:, :, :, 0]
class CenterNetMaskTargetAssigner(object): class CenterNetMaskTargetAssigner(object):
"""Wrapper to compute targets for segmentation masks.""" """Wrapper to compute targets for segmentation masks."""
...@@ -1642,13 +1652,9 @@ class CenterNetMaskTargetAssigner(object): ...@@ -1642,13 +1652,9 @@ class CenterNetMaskTargetAssigner(object):
segmentation_targets_list = [] segmentation_targets_list = []
for gt_masks, gt_classes in zip(gt_masks_list, gt_classes_list): for gt_masks, gt_classes in zip(gt_masks_list, gt_classes_list):
# Resize segmentation masks to conform to output dimensions. Use TF2 gt_masks = _resize_masks(gt_masks, output_height, output_width,
# image resize because TF1's version is buggy: mask_resize_method)
# https://yaqs.corp.google.com/eng/q/4970450458378240 gt_masks = gt_masks[:, :, :, tf.newaxis]
gt_masks = tf2.image.resize(
gt_masks[:, :, :, tf.newaxis],
size=(output_height, output_width),
method=mask_resize_method)
gt_classes_reshaped = tf.reshape(gt_classes, [-1, 1, 1, num_classes]) gt_classes_reshaped = tf.reshape(gt_classes, [-1, 1, 1, num_classes])
# Shape: [h, w, num_classes]. # Shape: [h, w, num_classes].
segmentations_for_image = tf.reduce_max( segmentations_for_image = tf.reduce_max(
...@@ -1657,3 +1663,235 @@ class CenterNetMaskTargetAssigner(object): ...@@ -1657,3 +1663,235 @@ class CenterNetMaskTargetAssigner(object):
segmentation_target = tf.stack(segmentation_targets_list, axis=0) segmentation_target = tf.stack(segmentation_targets_list, axis=0)
return segmentation_target return segmentation_target
class CenterNetDensePoseTargetAssigner(object):
"""Wrapper to compute targets for DensePose task."""
def __init__(self, stride, num_parts=24):
self._stride = stride
self._num_parts = num_parts
def assign_part_and_coordinate_targets(self,
height,
width,
gt_dp_num_points_list,
gt_dp_part_ids_list,
gt_dp_surface_coords_list,
gt_weights_list=None):
"""Returns the DensePose part_id and coordinate targets and their indices.
The returned values are expected to be used with predicted tensors
of size (batch_size, height//self._stride, width//self._stride, 2). The
predicted values at the relevant indices can be retrieved with the
get_batch_predictions_from_indices function.
Args:
height: int, height of input to the model. This is used to determine the
height of the output.
width: int, width of the input to the model. This is used to determine the
width of the output.
gt_dp_num_points_list: a list of 1-D tf.int32 tensors of shape [num_boxes]
containing the number of DensePose sampled points per box.
gt_dp_part_ids_list: a list of 2-D tf.int32 tensors of shape
[num_boxes, max_sampled_points] containing the DensePose part ids
(0-indexed) for each sampled point. Note that there may be padding, as
boxes may contain a different number of sampled points.
gt_dp_surface_coords_list: a list of 3-D tf.float32 tensors of shape
[num_boxes, max_sampled_points, 4] containing the DensePose surface
coordinates (normalized) for each sampled point. Note that there may be
padding.
gt_weights_list: A list of 1-D tensors with shape [num_boxes]
corresponding to the weight of each groundtruth detection box.
Returns:
batch_indices: an integer tensor of shape [num_total_points, 4] holding
the indices inside the predicted tensor which should be penalized. The
first column indicates the index along the batch dimension and the
second and third columns indicate the index along the y and x
dimensions respectively. The fourth column is the part index.
batch_part_ids: an int tensor of shape [num_total_points, num_parts]
holding 1-hot encodings of parts for each sampled point.
batch_surface_coords: a float tensor of shape [num_total_points, 2]
holding the expected (v, u) coordinates for each sampled point.
batch_weights: a float tensor of shape [num_total_points] indicating the
weight of each prediction.
Note that num_total_points = batch_size * num_boxes * max_sampled_points.
"""
if gt_weights_list is None:
gt_weights_list = [None] * len(gt_dp_num_points_list)
batch_indices = []
batch_part_ids = []
batch_surface_coords = []
batch_weights = []
for i, (num_points, part_ids, surface_coords, weights) in enumerate(
zip(gt_dp_num_points_list, gt_dp_part_ids_list,
gt_dp_surface_coords_list, gt_weights_list)):
num_boxes, max_sampled_points = (
shape_utils.combined_static_and_dynamic_shape(part_ids))
part_ids_flattened = tf.reshape(part_ids, [-1])
part_ids_one_hot = tf.one_hot(part_ids_flattened, depth=self._num_parts)
# Get DensePose coordinates in the output space.
surface_coords_abs = densepose_ops.to_absolute_coordinates(
surface_coords, height // self._stride, width // self._stride)
surface_coords_abs = tf.reshape(surface_coords_abs, [-1, 4])
# Each tensor has shape [num_boxes * max_sampled_points].
yabs, xabs, v, u = tf.unstack(surface_coords_abs, axis=-1)
# Get the indices (in output space) for the DensePose coordinates. Note
# that if self._stride is larger than 1, this will have the effect of
# reducing spatial resolution of the groundtruth points.
indices_y = tf.cast(yabs, tf.int32)
indices_x = tf.cast(xabs, tf.int32)
# Assign ones if weights are not provided.
if weights is None:
weights = tf.ones(num_boxes, dtype=tf.float32)
# Create per-point weights.
weights_per_point = tf.reshape(
tf.tile(weights[:, tf.newaxis], multiples=[1, max_sampled_points]),
shape=[-1])
# Mask out invalid (i.e. padded) DensePose points.
num_points_tiled = tf.tile(num_points[:, tf.newaxis],
multiples=[1, max_sampled_points])
range_tiled = tf.tile(tf.range(max_sampled_points)[tf.newaxis, :],
multiples=[num_boxes, 1])
valid_points = tf.math.less(range_tiled, num_points_tiled)
valid_points = tf.cast(tf.reshape(valid_points, [-1]), dtype=tf.float32)
weights_per_point = weights_per_point * valid_points
# Shape of [num_boxes * max_sampled_points] integer tensor filled with
# current batch index.
batch_index = i * tf.ones_like(indices_y, dtype=tf.int32)
batch_indices.append(
tf.stack([batch_index, indices_y, indices_x, part_ids_flattened],
axis=1))
batch_part_ids.append(part_ids_one_hot)
batch_surface_coords.append(tf.stack([v, u], axis=1))
batch_weights.append(weights_per_point)
batch_indices = tf.concat(batch_indices, axis=0)
batch_part_ids = tf.concat(batch_part_ids, axis=0)
batch_surface_coords = tf.concat(batch_surface_coords, axis=0)
batch_weights = tf.concat(batch_weights, axis=0)
return batch_indices, batch_part_ids, batch_surface_coords, batch_weights
def filter_mask_overlap_min_area(masks):
"""If a pixel belongs to 2 instances, remove it from the larger instance."""
num_instances = tf.shape(masks)[0]
def _filter_min_area():
"""Helper function to filter non empty masks."""
areas = tf.reduce_sum(masks, axis=[1, 2], keepdims=True)
per_pixel_area = masks * areas
# Make sure background is ignored in argmin.
per_pixel_area = (masks * per_pixel_area +
(1 - masks) * per_pixel_area.dtype.max)
min_index = tf.cast(tf.argmin(per_pixel_area, axis=0), tf.int32)
filtered_masks = (
tf.range(num_instances)[:, tf.newaxis, tf.newaxis]
==
min_index[tf.newaxis, :, :]
)
return tf.cast(filtered_masks, tf.float32) * masks
return tf.cond(num_instances > 0, _filter_min_area,
lambda: masks)
def filter_mask_overlap(masks, method='min_area'):
if method == 'min_area':
return filter_mask_overlap_min_area(masks)
else:
raise ValueError('Unknown mask overlap filter type - {}'.format(method))
class CenterNetCornerOffsetTargetAssigner(object):
"""Wrapper to compute corner offsets for boxes using masks."""
def __init__(self, stride, overlap_resolution='min_area'):
"""Initializes the corner offset target assigner.
Args:
stride: int, the stride of the network in output pixels.
overlap_resolution: string, specifies how we handle overlapping
instance masks. Currently only 'min_area' is supported which assigns
overlapping pixels to the instance with the minimum area.
"""
self._stride = stride
self._overlap_resolution = overlap_resolution
def assign_corner_offset_targets(
self, gt_boxes_list, gt_masks_list):
"""Computes the corner offset targets and foreground map.
For each pixel that is part of any object's foreground, this function
computes the relative offsets to the top-left and bottom-right corners of
that instance's bounding box. It also returns a foreground map to indicate
which pixels contain valid corner offsets.
Args:
gt_boxes_list: A list of float tensors with shape [num_boxes, 4]
representing the groundtruth detection bounding boxes for each sample in
the batch. The coordinates are expected in normalized coordinates.
gt_masks_list: A list of float tensors with shape [num_boxes,
input_height, input_width] with values in {0, 1} representing instance
masks for each object.
Returns:
corner_offsets: A float tensor of shape [batch_size, height, width, 4]
containing, in order, the (y, x) offsets to the top left corner and
the (y, x) offsets to the bottom right corner for each foregroung pixel
foreground: A float tensor of shape [batch_size, height, width] in which
each pixel is set to 1 if it is a part of any instance's foreground
(and thus contains valid corner offsets) and 0 otherwise.
"""
_, input_height, input_width = (
shape_utils.combined_static_and_dynamic_shape(gt_masks_list[0]))
output_height = input_height // self._stride
output_width = input_width // self._stride
y_grid, x_grid = tf.meshgrid(
tf.range(output_height), tf.range(output_width),
indexing='ij')
y_grid, x_grid = tf.cast(y_grid, tf.float32), tf.cast(x_grid, tf.float32)
corner_targets = []
foreground_targets = []
for gt_masks, gt_boxes in zip(gt_masks_list, gt_boxes_list):
gt_masks = _resize_masks(gt_masks, output_height, output_width,
method=ResizeMethod.NEAREST_NEIGHBOR)
gt_masks = filter_mask_overlap(gt_masks, self._overlap_resolution)
ymin, xmin, ymax, xmax = tf.unstack(gt_boxes, axis=1)
ymin, ymax = ymin * output_height, ymax * output_height
xmin, xmax = xmin * output_width, xmax * output_width
top_y = ymin[:, tf.newaxis, tf.newaxis] - y_grid[tf.newaxis]
left_x = xmin[:, tf.newaxis, tf.newaxis] - x_grid[tf.newaxis]
bottom_y = ymax[:, tf.newaxis, tf.newaxis] - y_grid[tf.newaxis]
right_x = xmax[:, tf.newaxis, tf.newaxis] - x_grid[tf.newaxis]
foreground_target = tf.cast(tf.reduce_sum(gt_masks, axis=0) > 0.5,
tf.float32)
foreground_targets.append(foreground_target)
corner_target = tf.stack([
tf.reduce_sum(top_y * gt_masks, axis=0),
tf.reduce_sum(left_x * gt_masks, axis=0),
tf.reduce_sum(bottom_y * gt_masks, axis=0),
tf.reduce_sum(right_x * gt_masks, axis=0),
], axis=2)
corner_targets.append(corner_target)
return (tf.stack(corner_targets, axis=0),
tf.stack(foreground_targets, axis=0))
...@@ -1906,6 +1906,274 @@ class CenterNetMaskTargetAssignerTest(test_case.TestCase): ...@@ -1906,6 +1906,274 @@ class CenterNetMaskTargetAssignerTest(test_case.TestCase):
expected_seg_target, segmentation_target) expected_seg_target, segmentation_target)
class CenterNetDensePoseTargetAssignerTest(test_case.TestCase):
def test_assign_part_and_coordinate_targets(self):
def graph_fn():
gt_dp_num_points_list = [
# Example 0.
tf.constant([2, 0, 3], dtype=tf.int32),
# Example 1.
tf.constant([1, 1], dtype=tf.int32),
]
gt_dp_part_ids_list = [
# Example 0.
tf.constant([[1, 6, 0],
[0, 0, 0],
[0, 2, 3]], dtype=tf.int32),
# Example 1.
tf.constant([[7, 0, 0],
[0, 0, 0]], dtype=tf.int32),
]
gt_dp_surface_coords_list = [
# Example 0.
tf.constant(
[[[0.11, 0.2, 0.3, 0.4], # Box 0.
[0.6, 0.4, 0.1, 0.0],
[0.0, 0.0, 0.0, 0.0]],
[[0.0, 0.0, 0.0, 0.0], # Box 1.
[0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0]],
[[0.22, 0.1, 0.6, 0.8], # Box 2.
[0.0, 0.4, 0.5, 1.0],
[0.3, 0.2, 0.4, 0.1]]],
dtype=tf.float32),
# Example 1.
tf.constant(
[[[0.5, 0.5, 0.3, 1.0], # Box 0.
[0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0]],
[[0.2, 0.2, 0.5, 0.8], # Box 1.
[0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0]]],
dtype=tf.float32),
]
gt_weights_list = [
# Example 0.
tf.constant([1.0, 1.0, 0.5], dtype=tf.float32),
# Example 1.
tf.constant([0.0, 1.0], dtype=tf.float32),
]
cn_assigner = targetassigner.CenterNetDensePoseTargetAssigner(stride=4)
batch_indices, batch_part_ids, batch_surface_coords, batch_weights = (
cn_assigner.assign_part_and_coordinate_targets(
height=120,
width=80,
gt_dp_num_points_list=gt_dp_num_points_list,
gt_dp_part_ids_list=gt_dp_part_ids_list,
gt_dp_surface_coords_list=gt_dp_surface_coords_list,
gt_weights_list=gt_weights_list))
return batch_indices, batch_part_ids, batch_surface_coords, batch_weights
batch_indices, batch_part_ids, batch_surface_coords, batch_weights = (
self.execute(graph_fn, []))
expected_batch_indices = np.array([
# Example 0. e.g.
# The first set of indices is calculated as follows:
# floor(0.11*120/4) = 3, floor(0.2*80/4) = 4.
[0, 3, 4, 1], [0, 18, 8, 6], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0],
[0, 0, 0, 0], [0, 6, 2, 0], [0, 0, 8, 2], [0, 9, 4, 3],
# Example 1.
[1, 15, 10, 7], [1, 0, 0, 0], [1, 0, 0, 0], [1, 6, 4, 0], [1, 0, 0, 0],
[1, 0, 0, 0]
], dtype=np.int32)
expected_batch_part_ids = tf.one_hot(
[1, 6, 0, 0, 0, 0, 0, 2, 3, 7, 0, 0, 0, 0, 0], depth=24).numpy()
expected_batch_surface_coords = np.array([
# Box 0.
[0.3, 0.4], [0.1, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0],
[0.6, 0.8], [0.5, 1.0], [0.4, 0.1],
# Box 1.
[0.3, 1.0], [0.0, 0.0], [0.0, 0.0], [0.5, 0.8], [0.0, 0.0], [0.0, 0.0],
], np.float32)
expected_batch_weights = np.array([
# Box 0.
1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5,
# Box 1.
0.0, 0.0, 0.0, 1.0, 0.0, 0.0
], dtype=np.float32)
self.assertAllEqual(expected_batch_indices, batch_indices)
self.assertAllEqual(expected_batch_part_ids, batch_part_ids)
self.assertAllClose(expected_batch_surface_coords, batch_surface_coords)
self.assertAllClose(expected_batch_weights, batch_weights)
class CornerOffsetTargetAssignerTest(test_case.TestCase):
def test_filter_overlap_min_area_empty(self):
"""Test that empty masks work on CPU."""
def graph_fn(masks):
return targetassigner.filter_mask_overlap_min_area(masks)
masks = self.execute_cpu(graph_fn, [np.zeros((0, 5, 5), dtype=np.float32)])
self.assertEqual(masks.shape, (0, 5, 5))
def test_filter_overlap_min_area(self):
"""Test the object with min. area is selected instead of overlap."""
def graph_fn(masks):
return targetassigner.filter_mask_overlap_min_area(masks)
masks = np.zeros((3, 4, 4), dtype=np.float32)
masks[0, :2, :2] = 1.0
masks[1, :3, :3] = 1.0
masks[2, 3, 3] = 1.0
masks = self.execute(graph_fn, [masks])
self.assertAllClose(masks[0],
[[1, 1, 0, 0],
[1, 1, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]])
self.assertAllClose(masks[1],
[[0, 0, 1, 0],
[0, 0, 1, 0],
[1, 1, 1, 0],
[0, 0, 0, 0]])
self.assertAllClose(masks[2],
[[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 1]])
def test_assign_corner_offset_single_object(self):
"""Test that corner offsets are correct with a single object."""
assigner = targetassigner.CenterNetCornerOffsetTargetAssigner(stride=1)
def graph_fn():
boxes = [
tf.constant([[0., 0., 1., 1.]])
]
mask = np.zeros((1, 4, 4), dtype=np.float32)
mask[0, 1:3, 1:3] = 1.0
masks = [tf.constant(mask)]
return assigner.assign_corner_offset_targets(boxes, masks)
corner_offsets, foreground = self.execute(graph_fn, [])
self.assertAllClose(foreground[0],
[[0, 0, 0, 0],
[0, 1, 1, 0],
[0, 1, 1, 0],
[0, 0, 0, 0]])
self.assertAllClose(corner_offsets[0, :, :, 0],
[[0, 0, 0, 0],
[0, -1, -1, 0],
[0, -2, -2, 0],
[0, 0, 0, 0]])
self.assertAllClose(corner_offsets[0, :, :, 1],
[[0, 0, 0, 0],
[0, -1, -2, 0],
[0, -1, -2, 0],
[0, 0, 0, 0]])
self.assertAllClose(corner_offsets[0, :, :, 2],
[[0, 0, 0, 0],
[0, 3, 3, 0],
[0, 2, 2, 0],
[0, 0, 0, 0]])
self.assertAllClose(corner_offsets[0, :, :, 3],
[[0, 0, 0, 0],
[0, 3, 2, 0],
[0, 3, 2, 0],
[0, 0, 0, 0]])
def test_assign_corner_offset_multiple_objects(self):
"""Test corner offsets are correct with multiple objects."""
assigner = targetassigner.CenterNetCornerOffsetTargetAssigner(stride=1)
def graph_fn():
boxes = [
tf.constant([[0., 0., 1., 1.], [0., 0., 0., 0.]]),
tf.constant([[0., 0., .25, .25], [.25, .25, 1., 1.]])
]
mask1 = np.zeros((2, 4, 4), dtype=np.float32)
mask1[0, 0, 0] = 1.0
mask1[0, 3, 3] = 1.0
mask2 = np.zeros((2, 4, 4), dtype=np.float32)
mask2[0, :2, :2] = 1.0
mask2[1, 1:, 1:] = 1.0
masks = [tf.constant(mask1), tf.constant(mask2)]
return assigner.assign_corner_offset_targets(boxes, masks)
corner_offsets, foreground = self.execute(graph_fn, [])
self.assertEqual(corner_offsets.shape, (2, 4, 4, 4))
self.assertEqual(foreground.shape, (2, 4, 4))
self.assertAllClose(foreground[0],
[[1, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 1]])
self.assertAllClose(corner_offsets[0, :, :, 0],
[[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, -3]])
self.assertAllClose(corner_offsets[0, :, :, 1],
[[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, -3]])
self.assertAllClose(corner_offsets[0, :, :, 2],
[[4, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 1]])
self.assertAllClose(corner_offsets[0, :, :, 3],
[[4, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 1]])
self.assertAllClose(foreground[1],
[[1, 1, 0, 0],
[1, 1, 1, 1],
[0, 1, 1, 1],
[0, 1, 1, 1]])
self.assertAllClose(corner_offsets[1, :, :, 0],
[[0, 0, 0, 0],
[-1, -1, 0, 0],
[0, -1, -1, -1],
[0, -2, -2, -2]])
self.assertAllClose(corner_offsets[1, :, :, 1],
[[0, -1, 0, 0],
[0, -1, -1, -2],
[0, 0, -1, -2],
[0, 0, -1, -2]])
self.assertAllClose(corner_offsets[1, :, :, 2],
[[1, 1, 0, 0],
[0, 0, 3, 3],
[0, 2, 2, 2],
[0, 1, 1, 1]])
self.assertAllClose(corner_offsets[1, :, :, 3],
[[1, 0, 0, 0],
[1, 0, 2, 1],
[0, 3, 2, 1],
[0, 3, 2, 1]])
def test_assign_corner_offsets_no_objects(self):
"""Test assignment works with empty input on cpu."""
assigner = targetassigner.CenterNetCornerOffsetTargetAssigner(stride=1)
def graph_fn():
boxes = [
tf.zeros((0, 4), dtype=tf.float32)
]
masks = [tf.zeros((0, 5, 5), dtype=tf.float32)]
return assigner.assign_corner_offset_targets(boxes, masks)
corner_offsets, foreground = self.execute_cpu(graph_fn, [])
self.assertAllClose(corner_offsets, np.zeros((1, 5, 5, 4)))
self.assertAllClose(foreground, np.zeros((1, 5, 5)))
if __name__ == '__main__': if __name__ == '__main__':
tf.enable_v2_behavior() tf.enable_v2_behavior()
tf.test.main() tf.test.main()
...@@ -30,6 +30,7 @@ from object_detection.core import data_decoder ...@@ -30,6 +30,7 @@ from object_detection.core import data_decoder
from object_detection.core import standard_fields as fields from object_detection.core import standard_fields as fields
from object_detection.protos import input_reader_pb2 from object_detection.protos import input_reader_pb2
from object_detection.utils import label_map_util from object_detection.utils import label_map_util
from object_detection.utils import shape_utils
# pylint: disable=g-import-not-at-top # pylint: disable=g-import-not-at-top
try: try:
...@@ -170,7 +171,8 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -170,7 +171,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
num_additional_channels=0, num_additional_channels=0,
load_multiclass_scores=False, load_multiclass_scores=False,
load_context_features=False, load_context_features=False,
expand_hierarchy_labels=False): expand_hierarchy_labels=False,
load_dense_pose=False):
"""Constructor sets keys_to_features and items_to_handlers. """Constructor sets keys_to_features and items_to_handlers.
Args: Args:
...@@ -201,6 +203,7 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -201,6 +203,7 @@ class TfExampleDecoder(data_decoder.DataDecoder):
account the provided hierarchy in the label_map_proto_file. For positive account the provided hierarchy in the label_map_proto_file. For positive
classes, the labels are extended to ancestor. For negative classes, classes, the labels are extended to ancestor. For negative classes,
the labels are expanded to descendants. the labels are expanded to descendants.
load_dense_pose: Whether to load DensePose annotations.
Raises: Raises:
ValueError: If `instance_mask_type` option is not one of ValueError: If `instance_mask_type` option is not one of
...@@ -371,6 +374,34 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -371,6 +374,34 @@ class TfExampleDecoder(data_decoder.DataDecoder):
self._decode_png_instance_masks)) self._decode_png_instance_masks))
else: else:
raise ValueError('Did not recognize the `instance_mask_type` option.') raise ValueError('Did not recognize the `instance_mask_type` option.')
if load_dense_pose:
self.keys_to_features['image/object/densepose/num'] = (
tf.VarLenFeature(tf.int64))
self.keys_to_features['image/object/densepose/part_index'] = (
tf.VarLenFeature(tf.int64))
self.keys_to_features['image/object/densepose/x'] = (
tf.VarLenFeature(tf.float32))
self.keys_to_features['image/object/densepose/y'] = (
tf.VarLenFeature(tf.float32))
self.keys_to_features['image/object/densepose/u'] = (
tf.VarLenFeature(tf.float32))
self.keys_to_features['image/object/densepose/v'] = (
tf.VarLenFeature(tf.float32))
self.items_to_handlers[
fields.InputDataFields.groundtruth_dp_num_points] = (
slim_example_decoder.Tensor('image/object/densepose/num'))
self.items_to_handlers[fields.InputDataFields.groundtruth_dp_part_ids] = (
slim_example_decoder.ItemHandlerCallback(
['image/object/densepose/part_index',
'image/object/densepose/num'], self._dense_pose_part_indices))
self.items_to_handlers[
fields.InputDataFields.groundtruth_dp_surface_coords] = (
slim_example_decoder.ItemHandlerCallback(
['image/object/densepose/x', 'image/object/densepose/y',
'image/object/densepose/u', 'image/object/densepose/v',
'image/object/densepose/num'],
self._dense_pose_surface_coordinates))
if label_map_proto_file: if label_map_proto_file:
# If the label_map_proto is provided, try to use it in conjunction with # If the label_map_proto is provided, try to use it in conjunction with
# the class text, and fall back to a materialized ID. # the class text, and fall back to a materialized ID.
...@@ -547,6 +578,14 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -547,6 +578,14 @@ class TfExampleDecoder(data_decoder.DataDecoder):
group_of = fields.InputDataFields.groundtruth_group_of group_of = fields.InputDataFields.groundtruth_group_of
tensor_dict[group_of] = tf.cast(tensor_dict[group_of], dtype=tf.bool) tensor_dict[group_of] = tf.cast(tensor_dict[group_of], dtype=tf.bool)
if fields.InputDataFields.groundtruth_dp_num_points in tensor_dict:
tensor_dict[fields.InputDataFields.groundtruth_dp_num_points] = tf.cast(
tensor_dict[fields.InputDataFields.groundtruth_dp_num_points],
dtype=tf.int32)
tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids] = tf.cast(
tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids],
dtype=tf.int32)
return tensor_dict return tensor_dict
def _reshape_keypoints(self, keys_to_tensors): def _reshape_keypoints(self, keys_to_tensors):
...@@ -697,6 +736,97 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -697,6 +736,97 @@ class TfExampleDecoder(data_decoder.DataDecoder):
lambda: tf.map_fn(decode_png_mask, png_masks, dtype=tf.float32), lambda: tf.map_fn(decode_png_mask, png_masks, dtype=tf.float32),
lambda: tf.zeros(tf.cast(tf.stack([0, height, width]), dtype=tf.int32))) lambda: tf.zeros(tf.cast(tf.stack([0, height, width]), dtype=tf.int32)))
def _dense_pose_part_indices(self, keys_to_tensors):
"""Creates a tensor that contains part indices for each DensePose point.
Args:
keys_to_tensors: a dictionary from keys to tensors.
Returns:
A 2-D int32 tensor of shape [num_instances, num_points] where each element
contains the DensePose part index (0-23). The value `num_points`
corresponds to the maximum number of sampled points across all instances
in the image. Note that instances with less sampled points will be padded
with zeros in the last dimension.
"""
num_points_per_instances = keys_to_tensors['image/object/densepose/num']
part_index = keys_to_tensors['image/object/densepose/part_index']
if isinstance(num_points_per_instances, tf.SparseTensor):
num_points_per_instances = tf.sparse_tensor_to_dense(
num_points_per_instances)
if isinstance(part_index, tf.SparseTensor):
part_index = tf.sparse_tensor_to_dense(part_index)
part_index = tf.cast(part_index, dtype=tf.int32)
max_points_per_instance = tf.cast(
tf.math.reduce_max(num_points_per_instances), dtype=tf.int32)
num_points_cumulative = tf.concat([
[0], tf.math.cumsum(num_points_per_instances)], axis=0)
def pad_parts_tensor(instance_ind):
points_range_start = num_points_cumulative[instance_ind]
points_range_end = num_points_cumulative[instance_ind + 1]
part_inds = part_index[points_range_start:points_range_end]
return shape_utils.pad_or_clip_nd(part_inds,
output_shape=[max_points_per_instance])
return tf.map_fn(pad_parts_tensor,
tf.range(tf.size(num_points_per_instances)),
dtype=tf.int32)
def _dense_pose_surface_coordinates(self, keys_to_tensors):
"""Creates a tensor that contains surface coords for each DensePose point.
Args:
keys_to_tensors: a dictionary from keys to tensors.
Returns:
A 3-D float32 tensor of shape [num_instances, num_points, 4] where each
point contains (y, x, v, u) data for each sampled DensePose point. The
(y, x) coordinate has normalized image locations for the point, and (v, u)
contains the surface coordinate (also normalized) for the part. The value
`num_points` corresponds to the maximum number of sampled points across
all instances in the image. Note that instances with less sampled points
will be padded with zeros in dim=1.
"""
num_points_per_instances = keys_to_tensors['image/object/densepose/num']
dp_y = keys_to_tensors['image/object/densepose/y']
dp_x = keys_to_tensors['image/object/densepose/x']
dp_v = keys_to_tensors['image/object/densepose/v']
dp_u = keys_to_tensors['image/object/densepose/u']
if isinstance(num_points_per_instances, tf.SparseTensor):
num_points_per_instances = tf.sparse_tensor_to_dense(
num_points_per_instances)
if isinstance(dp_y, tf.SparseTensor):
dp_y = tf.sparse_tensor_to_dense(dp_y)
if isinstance(dp_x, tf.SparseTensor):
dp_x = tf.sparse_tensor_to_dense(dp_x)
if isinstance(dp_v, tf.SparseTensor):
dp_v = tf.sparse_tensor_to_dense(dp_v)
if isinstance(dp_u, tf.SparseTensor):
dp_u = tf.sparse_tensor_to_dense(dp_u)
max_points_per_instance = tf.cast(
tf.math.reduce_max(num_points_per_instances), dtype=tf.int32)
num_points_cumulative = tf.concat([
[0], tf.math.cumsum(num_points_per_instances)], axis=0)
def pad_surface_coordinates_tensor(instance_ind):
"""Pads DensePose surface coordinates for each instance."""
points_range_start = num_points_cumulative[instance_ind]
points_range_end = num_points_cumulative[instance_ind + 1]
y = dp_y[points_range_start:points_range_end]
x = dp_x[points_range_start:points_range_end]
v = dp_v[points_range_start:points_range_end]
u = dp_u[points_range_start:points_range_end]
# Create [num_points_i, 4] tensor, where num_points_i is the number of
# sampled points for instance i.
unpadded_tensor = tf.stack([y, x, v, u], axis=1)
return shape_utils.pad_or_clip_nd(
unpadded_tensor, output_shape=[max_points_per_instance, 4])
return tf.map_fn(pad_surface_coordinates_tensor,
tf.range(tf.size(num_points_per_instances)),
dtype=tf.float32)
def _expand_image_label_hierarchy(self, image_classes, image_confidences): def _expand_image_label_hierarchy(self, image_classes, image_confidences):
"""Expand image level labels according to the hierarchy. """Expand image level labels according to the hierarchy.
......
...@@ -1096,8 +1096,8 @@ class TfExampleDecoderTest(test_case.TestCase): ...@@ -1096,8 +1096,8 @@ class TfExampleDecoderTest(test_case.TestCase):
return example_decoder.decode(tf.convert_to_tensor(example)) return example_decoder.decode(tf.convert_to_tensor(example))
tensor_dict = self.execute_cpu(graph_fn, []) tensor_dict = self.execute_cpu(graph_fn, [])
self.assertTrue( self.assertNotIn(fields.InputDataFields.groundtruth_instance_masks,
fields.InputDataFields.groundtruth_instance_masks not in tensor_dict) tensor_dict)
def testDecodeImageLabels(self): def testDecodeImageLabels(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
...@@ -1116,8 +1116,7 @@ class TfExampleDecoderTest(test_case.TestCase): ...@@ -1116,8 +1116,7 @@ class TfExampleDecoderTest(test_case.TestCase):
return example_decoder.decode(tf.convert_to_tensor(example)) return example_decoder.decode(tf.convert_to_tensor(example))
tensor_dict = self.execute_cpu(graph_fn_1, []) tensor_dict = self.execute_cpu(graph_fn_1, [])
self.assertTrue( self.assertIn(fields.InputDataFields.groundtruth_image_classes, tensor_dict)
fields.InputDataFields.groundtruth_image_classes in tensor_dict)
self.assertAllEqual( self.assertAllEqual(
tensor_dict[fields.InputDataFields.groundtruth_image_classes], tensor_dict[fields.InputDataFields.groundtruth_image_classes],
np.array([1, 2])) np.array([1, 2]))
...@@ -1152,8 +1151,7 @@ class TfExampleDecoderTest(test_case.TestCase): ...@@ -1152,8 +1151,7 @@ class TfExampleDecoderTest(test_case.TestCase):
return example_decoder.decode(tf.convert_to_tensor(example)) return example_decoder.decode(tf.convert_to_tensor(example))
tensor_dict = self.execute_cpu(graph_fn_2, []) tensor_dict = self.execute_cpu(graph_fn_2, [])
self.assertTrue( self.assertIn(fields.InputDataFields.groundtruth_image_classes, tensor_dict)
fields.InputDataFields.groundtruth_image_classes in tensor_dict)
self.assertAllEqual( self.assertAllEqual(
tensor_dict[fields.InputDataFields.groundtruth_image_classes], tensor_dict[fields.InputDataFields.groundtruth_image_classes],
np.array([1, 3])) np.array([1, 3]))
...@@ -1345,6 +1343,93 @@ class TfExampleDecoderTest(test_case.TestCase): ...@@ -1345,6 +1343,93 @@ class TfExampleDecoderTest(test_case.TestCase):
expected_image_confidence, expected_image_confidence,
tensor_dict[fields.InputDataFields.groundtruth_image_confidences]) tensor_dict[fields.InputDataFields.groundtruth_image_confidences])
def testDecodeDensePose(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg, _ = self._create_encoded_and_decoded_data(
image_tensor, 'jpeg')
bbox_ymins = [0.0, 4.0, 2.0]
bbox_xmins = [1.0, 5.0, 8.0]
bbox_ymaxs = [2.0, 6.0, 1.0]
bbox_xmaxs = [3.0, 7.0, 3.3]
densepose_num = [0, 4, 2]
densepose_part_index = [2, 2, 3, 4, 2, 9]
densepose_x = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]
densepose_y = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4]
densepose_u = [0.01, 0.02, 0.03, 0.04, 0.05, 0.06]
densepose_v = [0.99, 0.98, 0.97, 0.96, 0.95, 0.94]
def graph_fn():
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature(six.b('jpeg')),
'image/object/bbox/ymin':
dataset_util.float_list_feature(bbox_ymins),
'image/object/bbox/xmin':
dataset_util.float_list_feature(bbox_xmins),
'image/object/bbox/ymax':
dataset_util.float_list_feature(bbox_ymaxs),
'image/object/bbox/xmax':
dataset_util.float_list_feature(bbox_xmaxs),
'image/object/densepose/num':
dataset_util.int64_list_feature(densepose_num),
'image/object/densepose/part_index':
dataset_util.int64_list_feature(densepose_part_index),
'image/object/densepose/x':
dataset_util.float_list_feature(densepose_x),
'image/object/densepose/y':
dataset_util.float_list_feature(densepose_y),
'image/object/densepose/u':
dataset_util.float_list_feature(densepose_u),
'image/object/densepose/v':
dataset_util.float_list_feature(densepose_v),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder(
load_dense_pose=True)
output = example_decoder.decode(tf.convert_to_tensor(example))
dp_num_points = output[fields.InputDataFields.groundtruth_dp_num_points]
dp_part_ids = output[fields.InputDataFields.groundtruth_dp_part_ids]
dp_surface_coords = output[
fields.InputDataFields.groundtruth_dp_surface_coords]
return dp_num_points, dp_part_ids, dp_surface_coords
dp_num_points, dp_part_ids, dp_surface_coords = self.execute_cpu(
graph_fn, [])
expected_dp_num_points = [0, 4, 2]
expected_dp_part_ids = [
[0, 0, 0, 0],
[2, 2, 3, 4],
[2, 9, 0, 0]
]
expected_dp_surface_coords = np.array(
[
# Instance 0 (no points).
[[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]],
# Instance 1 (4 points).
[[0.9, 0.1, 0.99, 0.01],
[0.8, 0.2, 0.98, 0.02],
[0.7, 0.3, 0.97, 0.03],
[0.6, 0.4, 0.96, 0.04]],
# Instance 2 (2 points).
[[0.5, 0.5, 0.95, 0.05],
[0.4, 0.6, 0.94, 0.06],
[0., 0., 0., 0.],
[0., 0., 0., 0.]],
], dtype=np.float32)
self.assertAllEqual(dp_num_points, expected_dp_num_points)
self.assertAllEqual(dp_part_ids, expected_dp_part_ids)
self.assertAllClose(dp_surface_coords, expected_dp_surface_coords)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -43,70 +43,22 @@ from __future__ import absolute_import ...@@ -43,70 +43,22 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import argparse
import copy import copy
import datetime import datetime
import io import io
import itertools import itertools
import json import json
import os import os
from absl import app
from absl import flags
import apache_beam as beam
import numpy as np import numpy as np
import PIL.Image import PIL.Image
import six import six
import tensorflow as tf import tensorflow.compat.v1 as tf
from apache_beam import runners try:
import apache_beam as beam # pylint:disable=g-import-not-at-top
except ModuleNotFoundError:
flags.DEFINE_string('input_tfrecord', None, 'TFRecord containing images in ' pass
'tf.Example format for object detection, with bounding'
'boxes and contextual feature embeddings.')
flags.DEFINE_string('output_tfrecord', None,
'TFRecord containing images in tf.Example format, with '
'added contextual memory banks.')
flags.DEFINE_string('sequence_key', None, 'Key to use when grouping sequences: '
'so far supports `image/seq_id` and `image/location`.')
flags.DEFINE_string('time_horizon', None, 'What time horizon to use when '
'splitting the data, if any. Options are: `year`, `month`,'
' `week`, `day `, `hour`, `minute`, `None`.')
flags.DEFINE_integer('subsample_context_features_rate', 0, 'Whether to '
'subsample the context_features, and if so how many to '
'sample. If the rate is set to X, it will sample context '
'from 1 out of every X images. Default is sampling from '
'every image, which is X=0.')
flags.DEFINE_boolean('reduce_image_size', True, 'downsamples images to'
'have longest side max_image_dimension, maintaining aspect'
' ratio')
flags.DEFINE_integer('max_image_dimension', 1024, 'sets max image dimension')
flags.DEFINE_boolean('add_context_features', True, 'adds a memory bank of'
'embeddings to each clip')
flags.DEFINE_boolean('sorted_image_ids', True, 'whether the image source_ids '
'are sortable to deal with date_captured tie-breaks')
flags.DEFINE_string('image_ids_to_keep', 'All', 'path to .json list of image'
'ids to keep, used for ground truth eval creation')
flags.DEFINE_boolean('keep_context_features_image_id_list', False, 'Whether or '
'not to keep a list of the image_ids corresponding to the '
'memory bank')
flags.DEFINE_boolean('keep_only_positives', False, 'Whether or not to '
'keep only positive boxes based on score')
flags.DEFINE_boolean('keep_only_positives_gt', False, 'Whether or not to '
'keep only positive boxes based on gt class')
flags.DEFINE_float('context_features_score_threshold', 0.7, 'What score '
'threshold to use for boxes in context_features')
flags.DEFINE_integer('max_num_elements_in_context_features', 2000, 'Sets max '
'num elements per memory bank')
flags.DEFINE_integer('num_shards', 0, 'Number of output shards.')
flags.DEFINE_string('output_type', 'tf_sequence_example', 'Output type, one of '
'`tf_example`, `tf_sequence_example`')
flags.DEFINE_integer('max_clip_length', None, 'Max length for sequence '
'example outputs.')
FLAGS = flags.FLAGS
DEFAULT_FEATURE_LENGTH = 2057
class ReKeyDataFn(beam.DoFn): class ReKeyDataFn(beam.DoFn):
...@@ -406,7 +358,8 @@ class GenerateContextFn(beam.DoFn): ...@@ -406,7 +358,8 @@ class GenerateContextFn(beam.DoFn):
keep_only_positives_gt=False, keep_only_positives_gt=False,
max_num_elements_in_context_features=5000, max_num_elements_in_context_features=5000,
pad_context_features=False, pad_context_features=False,
output_type='tf_example', max_clip_length=None): output_type='tf_example', max_clip_length=None,
context_feature_length=2057):
"""Initialization function. """Initialization function.
Args: Args:
...@@ -432,6 +385,8 @@ class GenerateContextFn(beam.DoFn): ...@@ -432,6 +385,8 @@ class GenerateContextFn(beam.DoFn):
output_type: What type of output, tf_example of tf_sequence_example output_type: What type of output, tf_example of tf_sequence_example
max_clip_length: The maximum length of a sequence example, before max_clip_length: The maximum length of a sequence example, before
splitting into multiple splitting into multiple
context_feature_length: The length of the context feature embeddings
stored in the input data.
""" """
self._session = None self._session = None
self._num_examples_processed = beam.metrics.Metrics.counter( self._num_examples_processed = beam.metrics.Metrics.counter(
...@@ -456,6 +411,7 @@ class GenerateContextFn(beam.DoFn): ...@@ -456,6 +411,7 @@ class GenerateContextFn(beam.DoFn):
self._context_features_score_threshold = context_features_score_threshold self._context_features_score_threshold = context_features_score_threshold
self._max_num_elements_in_context_features = ( self._max_num_elements_in_context_features = (
max_num_elements_in_context_features) max_num_elements_in_context_features)
self._context_feature_length = context_feature_length
self._images_kept = beam.metrics.Metrics.counter( self._images_kept = beam.metrics.Metrics.counter(
'sequence_data_generation', 'images_kept') 'sequence_data_generation', 'images_kept')
...@@ -506,9 +462,9 @@ class GenerateContextFn(beam.DoFn): ...@@ -506,9 +462,9 @@ class GenerateContextFn(beam.DoFn):
context_features_image_id_list.append(example_image_id) context_features_image_id_list.append(example_image_id)
if not example_embedding: if not example_embedding:
example_embedding.append(np.zeros(DEFAULT_FEATURE_LENGTH)) example_embedding.append(np.zeros(self._context_feature_length))
feature_length = DEFAULT_FEATURE_LENGTH feature_length = self._context_feature_length
# If the example_list is not empty and image/embedding_length is in the # If the example_list is not empty and image/embedding_length is in the
# featture dict, feature_length will be assigned to that. Otherwise, it will # featture dict, feature_length will be assigned to that. Otherwise, it will
...@@ -703,7 +659,8 @@ class GenerateContextFn(beam.DoFn): ...@@ -703,7 +659,8 @@ class GenerateContextFn(beam.DoFn):
return list_of_examples return list_of_examples
def construct_pipeline(input_tfrecord, def construct_pipeline(pipeline,
input_tfrecord,
output_tfrecord, output_tfrecord,
sequence_key, sequence_key,
time_horizon=None, time_horizon=None,
...@@ -720,10 +677,12 @@ def construct_pipeline(input_tfrecord, ...@@ -720,10 +677,12 @@ def construct_pipeline(input_tfrecord,
max_num_elements_in_context_features=5000, max_num_elements_in_context_features=5000,
num_shards=0, num_shards=0,
output_type='tf_example', output_type='tf_example',
max_clip_length=None): max_clip_length=None,
context_feature_length=2057):
"""Returns a beam pipeline to run object detection inference. """Returns a beam pipeline to run object detection inference.
Args: Args:
pipeline: Initialized beam pipeline.
input_tfrecord: An TFRecord of tf.train.Example protos containing images. input_tfrecord: An TFRecord of tf.train.Example protos containing images.
output_tfrecord: An TFRecord of tf.train.Example protos that contain images output_tfrecord: An TFRecord of tf.train.Example protos that contain images
in the input TFRecord and the detections from the model. in the input TFRecord and the detections from the model.
...@@ -755,91 +714,224 @@ def construct_pipeline(input_tfrecord, ...@@ -755,91 +714,224 @@ def construct_pipeline(input_tfrecord,
output_type: What type of output, tf_example of tf_sequence_example output_type: What type of output, tf_example of tf_sequence_example
max_clip_length: The maximum length of a sequence example, before max_clip_length: The maximum length of a sequence example, before
splitting into multiple splitting into multiple
context_feature_length: The length of the context feature embeddings stored
in the input data.
""" """
def pipeline(root): if output_type == 'tf_example':
if output_type == 'tf_example': coder = beam.coders.ProtoCoder(tf.train.Example)
coder = beam.coders.ProtoCoder(tf.train.Example) elif output_type == 'tf_sequence_example':
elif output_type == 'tf_sequence_example': coder = beam.coders.ProtoCoder(tf.train.SequenceExample)
coder = beam.coders.ProtoCoder(tf.train.SequenceExample) else:
else: raise ValueError('Unsupported output type.')
raise ValueError('Unsupported output type.') input_collection = (
input_collection = ( pipeline | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
root | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord( input_tfrecord,
input_tfrecord, coder=beam.coders.BytesCoder()))
coder=beam.coders.BytesCoder())) rekey_collection = input_collection | 'RekeyExamples' >> beam.ParDo(
rekey_collection = input_collection | 'RekeyExamples' >> beam.ParDo( ReKeyDataFn(sequence_key, time_horizon,
ReKeyDataFn(sequence_key, time_horizon, reduce_image_size, max_image_dimension))
reduce_image_size, max_image_dimension)) grouped_collection = (
grouped_collection = ( rekey_collection | 'GroupBySequenceKey' >> beam.GroupByKey())
rekey_collection | 'GroupBySequenceKey' >> beam.GroupByKey()) grouped_collection = (
grouped_collection = ( grouped_collection | 'ReshuffleGroups' >> beam.Reshuffle())
grouped_collection | 'ReshuffleGroups' >> beam.Reshuffle()) ordered_collection = (
ordered_collection = ( grouped_collection | 'OrderByFrameNumber' >> beam.ParDo(
grouped_collection | 'OrderByFrameNumber' >> beam.ParDo( SortGroupedDataFn(sequence_key, sorted_image_ids,
SortGroupedDataFn(sequence_key, sorted_image_ids, max_num_elements_in_context_features)))
max_num_elements_in_context_features))) ordered_collection = (
ordered_collection = ( ordered_collection | 'ReshuffleSortedGroups' >> beam.Reshuffle())
ordered_collection | 'ReshuffleSortedGroups' >> beam.Reshuffle()) output_collection = (
output_collection = ( ordered_collection | 'AddContextToExamples' >> beam.ParDo(
ordered_collection | 'AddContextToExamples' >> beam.ParDo( GenerateContextFn(
GenerateContextFn( sequence_key, add_context_features, image_ids_to_keep,
sequence_key, add_context_features, image_ids_to_keep, keep_context_features_image_id_list=(
keep_context_features_image_id_list=( keep_context_features_image_id_list),
keep_context_features_image_id_list), subsample_context_features_rate=subsample_context_features_rate,
subsample_context_features_rate=subsample_context_features_rate, keep_only_positives=keep_only_positives,
keep_only_positives=keep_only_positives, keep_only_positives_gt=keep_only_positives_gt,
keep_only_positives_gt=keep_only_positives_gt, context_features_score_threshold=(
context_features_score_threshold=( context_features_score_threshold),
context_features_score_threshold), max_num_elements_in_context_features=(
max_num_elements_in_context_features=( max_num_elements_in_context_features),
max_num_elements_in_context_features), output_type=output_type,
output_type=output_type, max_clip_length=max_clip_length,
max_clip_length=max_clip_length))) context_feature_length=context_feature_length)))
output_collection = ( output_collection = (
output_collection | 'ReshuffleExamples' >> beam.Reshuffle()) output_collection | 'ReshuffleExamples' >> beam.Reshuffle())
_ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord( _ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
output_tfrecord, output_tfrecord,
num_shards=num_shards, num_shards=num_shards,
coder=coder) coder=coder)
return pipeline
def parse_args(argv):
def main(_): """Command-line argument parser.
"""Runs the Beam pipeline that builds context features.
Args: Args:
_: unused argv: command line arguments
Returns:
beam_args: Arguments for the beam pipeline.
pipeline_args: Arguments for the pipeline options, such as runner type.
""" """
# must create before flags are used parser = argparse.ArgumentParser()
runner = runners.DirectRunner() parser.add_argument(
'--input_tfrecord',
dest='input_tfrecord',
required=True,
help='TFRecord containing images in tf.Example format for object '
'detection, with bounding boxes and contextual feature embeddings.')
parser.add_argument(
'--output_tfrecord',
dest='output_tfrecord',
required=True,
help='TFRecord containing images in tf.Example format, with added '
'contextual memory banks.')
parser.add_argument(
'--sequence_key',
dest='sequence_key',
default='image/location',
help='Key to use when grouping sequences: so far supports `image/seq_id` '
'and `image/location`.')
parser.add_argument(
'--context_feature_length',
dest='context_feature_length',
default=2057,
help='The length of the context feature embeddings stored in the input '
'data.')
parser.add_argument(
'--time_horizon',
dest='time_horizon',
default=None,
help='What time horizon to use when splitting the data, if any. Options '
'are: `year`, `month`, `week`, `day `, `hour`, `minute`, `None`.')
parser.add_argument(
'--subsample_context_features_rate',
dest='subsample_context_features_rate',
default=0,
help='Whether to subsample the context_features, and if so how many to '
'sample. If the rate is set to X, it will sample context from 1 out of '
'every X images. Default is sampling from every image, which is X=0.')
parser.add_argument(
'--reduce_image_size',
dest='reduce_image_size',
default=True,
help='downsamples images to have longest side max_image_dimension, '
'maintaining aspect ratio')
parser.add_argument(
'--max_image_dimension',
dest='max_image_dimension',
default=1024,
help='Sets max image dimension for resizing.')
parser.add_argument(
'--add_context_features',
dest='add_context_features',
default=True,
help='Adds a memory bank of embeddings to each clip')
parser.add_argument(
'--sorted_image_ids',
dest='sorted_image_ids',
default=True,
help='Whether the image source_ids are sortable to deal with '
'date_captured tie-breaks.')
parser.add_argument(
'--image_ids_to_keep',
dest='image_ids_to_keep',
default='All',
help='Path to .json list of image ids to keep, used for ground truth '
'eval creation.')
parser.add_argument(
'--keep_context_features_image_id_list',
dest='keep_context_features_image_id_list',
default=False,
help='Whether or not to keep a list of the image_ids corresponding to '
'the memory bank.')
parser.add_argument(
'--keep_only_positives',
dest='keep_only_positives',
default=False,
help='Whether or not to keep only positive boxes based on score.')
parser.add_argument(
'--context_features_score_threshold',
dest='context_features_score_threshold',
default=0.7,
help='What score threshold to use for boxes in context_features, when '
'`keep_only_positives` is set to `True`.')
parser.add_argument(
'--keep_only_positives_gt',
dest='keep_only_positives_gt',
default=False,
help='Whether or not to keep only positive boxes based on gt class.')
parser.add_argument(
'--max_num_elements_in_context_features',
dest='max_num_elements_in_context_features',
default=2000,
help='Sets max number of context feature elements per memory bank. '
'If the number of images in the context group is greater than '
'`max_num_elements_in_context_features`, the context group will be split.'
)
parser.add_argument(
'--output_type',
dest='output_type',
default='tf_example',
help='Output type, one of `tf_example`, `tf_sequence_example`.')
parser.add_argument(
'--max_clip_length',
dest='max_clip_length',
default=None,
help='Max length for sequence example outputs.')
parser.add_argument(
'--num_shards',
dest='num_shards',
default=0,
help='Number of output shards.')
beam_args, pipeline_args = parser.parse_known_args(argv)
return beam_args, pipeline_args
def main(argv=None, save_main_session=True):
"""Runs the Beam pipeline that performs inference.
dirname = os.path.dirname(FLAGS.output_tfrecord) Args:
argv: Command line arguments.
save_main_session: Whether to save the main session.
"""
args, pipeline_args = parse_args(argv)
pipeline_options = beam.options.pipeline_options.PipelineOptions(
pipeline_args)
pipeline_options.view_as(
beam.options.pipeline_options.SetupOptions).save_main_session = (
save_main_session)
dirname = os.path.dirname(args.output_tfrecord)
tf.io.gfile.makedirs(dirname) tf.io.gfile.makedirs(dirname)
runner.run(
construct_pipeline(FLAGS.input_tfrecord, p = beam.Pipeline(options=pipeline_options)
FLAGS.output_tfrecord,
FLAGS.sequence_key, construct_pipeline(
FLAGS.time_horizon, p,
FLAGS.subsample_context_features_rate, args.input_tfrecord,
FLAGS.reduce_image_size, args.output_tfrecord,
FLAGS.max_image_dimension, args.sequence_key,
FLAGS.add_context_features, args.time_horizon,
FLAGS.sorted_image_ids, args.subsample_context_features_rate,
FLAGS.image_ids_to_keep, args.reduce_image_size,
FLAGS.keep_context_features_image_id_list, args.max_image_dimension,
FLAGS.keep_only_positives, args.add_context_features,
FLAGS.context_features_score_threshold, args.sorted_image_ids,
FLAGS.keep_only_positives_gt, args.image_ids_to_keep,
FLAGS.max_num_elements_in_context_features, args.keep_context_features_image_id_list,
FLAGS.num_shards, args.keep_only_positives,
FLAGS.output_type, args.context_features_score_threshold,
FLAGS.max_clip_length)) args.keep_only_positives_gt,
args.max_num_elements_in_context_features,
args.output_type,
args.max_clip_length,
args.context_feature_length)
p.run()
if __name__ == '__main__': if __name__ == '__main__':
flags.mark_flags_as_required([ main()
'input_tfrecord',
'output_tfrecord'
])
app.run(main)
...@@ -22,13 +22,19 @@ import datetime ...@@ -22,13 +22,19 @@ import datetime
import os import os
import tempfile import tempfile
import unittest import unittest
import numpy as np import numpy as np
import six import six
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from object_detection.dataset_tools.context_rcnn import add_context_to_examples from object_detection.dataset_tools.context_rcnn import add_context_to_examples
from object_detection.utils import tf_version from object_detection.utils import tf_version
from apache_beam import runners
try:
import apache_beam as beam # pylint:disable=g-import-not-at-top
except ModuleNotFoundError:
pass
@contextlib.contextmanager @contextlib.contextmanager
...@@ -200,7 +206,7 @@ class GenerateContextDataTest(tf.test.TestCase): ...@@ -200,7 +206,7 @@ class GenerateContextDataTest(tf.test.TestCase):
seq_feature_dict['region/label/string'].feature[1].bytes_list.value[:]) seq_feature_dict['region/label/string'].feature[1].bytes_list.value[:])
def assert_expected_key(self, key): def assert_expected_key(self, key):
self.assertAllEqual(key, '01') self.assertAllEqual(key, b'01')
def assert_sorted(self, example_collection): def assert_sorted(self, example_collection):
example_list = list(example_collection) example_list = list(example_collection)
...@@ -329,19 +335,22 @@ class GenerateContextDataTest(tf.test.TestCase): ...@@ -329,19 +335,22 @@ class GenerateContextDataTest(tf.test.TestCase):
with InMemoryTFRecord( with InMemoryTFRecord(
[self._create_first_tf_example(), [self._create_first_tf_example(),
self._create_second_tf_example()]) as input_tfrecord: self._create_second_tf_example()]) as input_tfrecord:
runner = runners.DirectRunner()
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR')) temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
output_tfrecord = os.path.join(temp_dir, 'output_tfrecord') output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
sequence_key = six.ensure_binary('image/seq_id') sequence_key = six.ensure_binary('image/seq_id')
max_num_elements = 10 max_num_elements = 10
num_shards = 1 num_shards = 1
pipeline = add_context_to_examples.construct_pipeline( pipeline_options = beam.options.pipeline_options.PipelineOptions(
runner='DirectRunner')
p = beam.Pipeline(options=pipeline_options)
add_context_to_examples.construct_pipeline(
p,
input_tfrecord, input_tfrecord,
output_tfrecord, output_tfrecord,
sequence_key, sequence_key,
max_num_elements_in_context_features=max_num_elements, max_num_elements_in_context_features=max_num_elements,
num_shards=num_shards) num_shards=num_shards)
runner.run(pipeline) p.run()
filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????') filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
actual_output = [] actual_output = []
record_iterator = tf.python_io.tf_record_iterator(path=filenames[0]) record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
...@@ -355,20 +364,23 @@ class GenerateContextDataTest(tf.test.TestCase): ...@@ -355,20 +364,23 @@ class GenerateContextDataTest(tf.test.TestCase):
with InMemoryTFRecord( with InMemoryTFRecord(
[self._create_first_tf_example(), [self._create_first_tf_example(),
self._create_second_tf_example()]) as input_tfrecord: self._create_second_tf_example()]) as input_tfrecord:
runner = runners.DirectRunner()
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR')) temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
output_tfrecord = os.path.join(temp_dir, 'output_tfrecord') output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
sequence_key = six.ensure_binary('image/seq_id') sequence_key = six.ensure_binary('image/seq_id')
max_num_elements = 10 max_num_elements = 10
num_shards = 1 num_shards = 1
pipeline = add_context_to_examples.construct_pipeline( pipeline_options = beam.options.pipeline_options.PipelineOptions(
runner='DirectRunner')
p = beam.Pipeline(options=pipeline_options)
add_context_to_examples.construct_pipeline(
p,
input_tfrecord, input_tfrecord,
output_tfrecord, output_tfrecord,
sequence_key, sequence_key,
max_num_elements_in_context_features=max_num_elements, max_num_elements_in_context_features=max_num_elements,
num_shards=num_shards, num_shards=num_shards,
output_type='tf_sequence_example') output_type='tf_sequence_example')
runner.run(pipeline) p.run()
filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????') filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
actual_output = [] actual_output = []
record_iterator = tf.python_io.tf_record_iterator( record_iterator = tf.python_io.tf_record_iterator(
......
...@@ -33,31 +33,21 @@ from __future__ import absolute_import ...@@ -33,31 +33,21 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import argparse
import hashlib import hashlib
import io import io
import json import json
import logging import logging
import os import os
from absl import app
from absl import flags
import apache_beam as beam
import numpy as np import numpy as np
import PIL.Image import PIL.Image
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from apache_beam import runners
from object_detection.utils import dataset_util from object_detection.utils import dataset_util
flags.DEFINE_string('image_directory', None, 'Directory where images are ' try:
'stored') import apache_beam as beam # pylint:disable=g-import-not-at-top
flags.DEFINE_string('output_tfrecord_prefix', None, except ModuleNotFoundError:
'TFRecord containing images in tf.Example format.') pass
flags.DEFINE_string('input_annotations_file', None, 'Path to Coco-CameraTraps'
'style annotations file')
flags.DEFINE_integer('num_images_per_shard',
200,
'The number of images to be stored in each shard.')
FLAGS = flags.FLAGS
class ParseImage(beam.DoFn): class ParseImage(beam.DoFn):
...@@ -243,13 +233,14 @@ class ParseImage(beam.DoFn): ...@@ -243,13 +233,14 @@ class ParseImage(beam.DoFn):
return [(example)] return [(example)]
def _load_json_data(data_file): def load_json_data(data_file):
with tf.io.gfile.GFile(data_file, 'r') as fid: with tf.io.gfile.GFile(data_file, 'r') as fid:
data_dict = json.load(fid) data_dict = json.load(fid)
return data_dict return data_dict
def create_pipeline(image_directory, def create_pipeline(pipeline,
image_directory,
input_annotations_file, input_annotations_file,
output_tfrecord_prefix=None, output_tfrecord_prefix=None,
num_images_per_shard=200, num_images_per_shard=200,
...@@ -257,68 +248,97 @@ def create_pipeline(image_directory, ...@@ -257,68 +248,97 @@ def create_pipeline(image_directory,
"""Creates a beam pipeline for producing a COCO-CameraTraps Image dataset. """Creates a beam pipeline for producing a COCO-CameraTraps Image dataset.
Args: Args:
pipeline: Initialized beam pipeline.
image_directory: Path to image directory image_directory: Path to image directory
input_annotations_file: Path to a coco-cameratraps annotation file input_annotations_file: Path to a coco-cameratraps annotation file
output_tfrecord_prefix: Absolute path for tfrecord outputs. Final files will output_tfrecord_prefix: Absolute path for tfrecord outputs. Final files will
be named {output_tfrecord_prefix}@N. be named {output_tfrecord_prefix}@N.
num_images_per_shard: The number of images to store in each shard num_images_per_shard: The number of images to store in each shard
keep_bboxes: Whether to keep any bounding boxes that exist in the json file keep_bboxes: Whether to keep any bounding boxes that exist in the json file
Returns:
A Beam pipeline.
""" """
logging.info('Reading data from COCO-CameraTraps Dataset.') logging.info('Reading data from COCO-CameraTraps Dataset.')
data = _load_json_data(input_annotations_file) data = load_json_data(input_annotations_file)
num_shards = int(np.ceil(float(len(data['images']))/num_images_per_shard)) num_shards = int(np.ceil(float(len(data['images']))/num_images_per_shard))
def pipeline(root): image_examples = (
"""Builds beam pipeline.""" pipeline | ('CreateCollections') >> beam.Create(
[im['id'] for im in data['images']])
image_examples = ( | ('ParseImage') >> beam.ParDo(ParseImage(
root image_directory, data['images'], data['annotations'],
| ('CreateCollections') >> beam.Create( data['categories'], keep_bboxes=keep_bboxes)))
[im['id'] for im in data['images']]) _ = (image_examples
| ('ParseImage') >> beam.ParDo(ParseImage( | ('Reshuffle') >> beam.Reshuffle()
image_directory, data['images'], data['annotations'], | ('WriteTfImageExample') >> beam.io.tfrecordio.WriteToTFRecord(
data['categories'], keep_bboxes=keep_bboxes))) output_tfrecord_prefix,
_ = (image_examples num_shards=num_shards,
| ('Reshuffle') >> beam.Reshuffle() coder=beam.coders.ProtoCoder(tf.train.Example)))
| ('WriteTfImageExample') >> beam.io.tfrecordio.WriteToTFRecord(
output_tfrecord_prefix,
num_shards=num_shards,
coder=beam.coders.ProtoCoder(tf.train.Example)))
return pipeline
def parse_args(argv):
"""Command-line argument parser.
def main(_): Args:
argv: command line arguments
Returns:
beam_args: Arguments for the beam pipeline.
pipeline_args: Arguments for the pipeline options, such as runner type.
"""
parser = argparse.ArgumentParser()
parser.add_argument(
'--image_directory',
dest='image_directory',
required=True,
help='Path to the directory where the images are stored.')
parser.add_argument(
'--output_tfrecord_prefix',
dest='output_tfrecord_prefix',
required=True,
help='Path and prefix to store TFRecords containing images in tf.Example'
'format.')
parser.add_argument(
'--input_annotations_file',
dest='input_annotations_file',
required=True,
help='Path to Coco-CameraTraps style annotations file.')
parser.add_argument(
'--num_images_per_shard',
dest='num_images_per_shard',
default=200,
help='The number of images to be stored in each outputshard.')
beam_args, pipeline_args = parser.parse_known_args(argv)
return beam_args, pipeline_args
def main(argv=None, save_main_session=True):
"""Runs the Beam pipeline that performs inference. """Runs the Beam pipeline that performs inference.
Args: Args:
_: unused argv: Command line arguments.
save_main_session: Whether to save the main session.
""" """
args, pipeline_args = parse_args(argv)
# must create before flags are used pipeline_options = beam.options.pipeline_options.PipelineOptions(
runner = runners.DirectRunner() pipeline_args)
pipeline_options.view_as(
beam.options.pipeline_options.SetupOptions).save_main_session = (
save_main_session)
dirname = os.path.dirname(FLAGS.output_tfrecord_prefix) dirname = os.path.dirname(args.output_tfrecord_prefix)
tf.io.gfile.makedirs(dirname) tf.io.gfile.makedirs(dirname)
runner.run( p = beam.Pipeline(options=pipeline_options)
create_pipeline( create_pipeline(
image_directory=FLAGS.image_directory, pipeline=p,
input_annotations_file=FLAGS.input_annotations_file, image_directory=args.image_directory,
output_tfrecord_prefix=FLAGS.output_tfrecord_prefix, input_annotations_file=args.input_annotations_file,
num_images_per_shard=FLAGS.num_images_per_shard)) output_tfrecord_prefix=args.output_tfrecord_prefix,
num_images_per_shard=args.num_images_per_shard)
p.run()
if __name__ == '__main__': if __name__ == '__main__':
flags.mark_flags_as_required([ main()
'image_directory',
'input_annotations_file',
'output_tfrecord_prefix'
])
app.run(main)
...@@ -21,13 +21,18 @@ import json ...@@ -21,13 +21,18 @@ import json
import os import os
import tempfile import tempfile
import unittest import unittest
import numpy as np import numpy as np
from PIL import Image from PIL import Image
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from object_detection.dataset_tools.context_rcnn import create_cococameratraps_tfexample_main from object_detection.dataset_tools.context_rcnn import create_cococameratraps_tfexample_main
from object_detection.utils import tf_version from object_detection.utils import tf_version
from apache_beam import runners
try:
import apache_beam as beam # pylint:disable=g-import-not-at-top
except ModuleNotFoundError:
pass
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.') @unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
...@@ -95,13 +100,13 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase): ...@@ -95,13 +100,13 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
.int64_list.value, [1]) .int64_list.value, [1])
self.assertAllEqual( self.assertAllEqual(
example.features.feature['image/object/class/text'] example.features.feature['image/object/class/text']
.bytes_list.value, ['animal']) .bytes_list.value, [b'animal'])
self.assertAllClose( self.assertAllClose(
example.features.feature['image/class/label'] example.features.feature['image/class/label']
.int64_list.value, [1]) .int64_list.value, [1])
self.assertAllEqual( self.assertAllEqual(
example.features.feature['image/class/text'] example.features.feature['image/class/text']
.bytes_list.value, ['animal']) .bytes_list.value, [b'animal'])
# Check other essential attributes. # Check other essential attributes.
self.assertAllEqual( self.assertAllEqual(
...@@ -112,7 +117,7 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase): ...@@ -112,7 +117,7 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
[self.IMAGE_WIDTH]) [self.IMAGE_WIDTH])
self.assertAllEqual( self.assertAllEqual(
example.features.feature['image/source_id'].bytes_list.value, example.features.feature['image/source_id'].bytes_list.value,
['im_0']) [b'im_0'])
self.assertTrue( self.assertTrue(
example.features.feature['image/encoded'].bytes_list.value) example.features.feature['image/encoded'].bytes_list.value)
...@@ -134,13 +139,13 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase): ...@@ -134,13 +139,13 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
.int64_list.value, [1]) .int64_list.value, [1])
self.assertAllEqual( self.assertAllEqual(
example.features.feature['image/object/class/text'] example.features.feature['image/object/class/text']
.bytes_list.value, ['animal']) .bytes_list.value, [b'animal'])
self.assertAllClose( self.assertAllClose(
example.features.feature['image/class/label'] example.features.feature['image/class/label']
.int64_list.value, [1]) .int64_list.value, [1])
self.assertAllEqual( self.assertAllEqual(
example.features.feature['image/class/text'] example.features.feature['image/class/text']
.bytes_list.value, ['animal']) .bytes_list.value, [b'animal'])
# Check other essential attributes. # Check other essential attributes.
self.assertAllEqual( self.assertAllEqual(
...@@ -151,21 +156,23 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase): ...@@ -151,21 +156,23 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
[self.IMAGE_WIDTH]) [self.IMAGE_WIDTH])
self.assertAllEqual( self.assertAllEqual(
example.features.feature['image/source_id'].bytes_list.value, example.features.feature['image/source_id'].bytes_list.value,
['im_0']) [b'im_0'])
self.assertTrue( self.assertTrue(
example.features.feature['image/encoded'].bytes_list.value) example.features.feature['image/encoded'].bytes_list.value)
def test_beam_pipeline(self): def test_beam_pipeline(self):
runner = runners.DirectRunner()
num_frames = 1 num_frames = 1
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR')) temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
json_path = self._create_json_file(temp_dir, num_frames) json_path = self._create_json_file(temp_dir, num_frames)
output_tfrecord = temp_dir+'/output' output_tfrecord = temp_dir+'/output'
self._write_random_images_to_directory(temp_dir, num_frames) self._write_random_images_to_directory(temp_dir, num_frames)
pipeline = create_cococameratraps_tfexample_main.create_pipeline( pipeline_options = beam.options.pipeline_options.PipelineOptions(
temp_dir, json_path, runner='DirectRunner')
p = beam.Pipeline(options=pipeline_options)
create_cococameratraps_tfexample_main.create_pipeline(
p, temp_dir, json_path,
output_tfrecord_prefix=output_tfrecord) output_tfrecord_prefix=output_tfrecord)
runner.run(pipeline) p.run()
filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????') filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
actual_output = [] actual_output = []
record_iterator = tf.python_io.tf_record_iterator(path=filenames[0]) record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
...@@ -176,17 +183,19 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase): ...@@ -176,17 +183,19 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
actual_output[0])) actual_output[0]))
def test_beam_pipeline_bbox(self): def test_beam_pipeline_bbox(self):
runner = runners.DirectRunner()
num_frames = 1 num_frames = 1
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR')) temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
json_path = self._create_json_file(temp_dir, num_frames, keep_bboxes=True) json_path = self._create_json_file(temp_dir, num_frames, keep_bboxes=True)
output_tfrecord = temp_dir+'/output' output_tfrecord = temp_dir+'/output'
self._write_random_images_to_directory(temp_dir, num_frames) self._write_random_images_to_directory(temp_dir, num_frames)
pipeline = create_cococameratraps_tfexample_main.create_pipeline( pipeline_options = beam.options.pipeline_options.PipelineOptions(
temp_dir, json_path, runner='DirectRunner')
p = beam.Pipeline(options=pipeline_options)
create_cococameratraps_tfexample_main.create_pipeline(
p, temp_dir, json_path,
output_tfrecord_prefix=output_tfrecord, output_tfrecord_prefix=output_tfrecord,
keep_bboxes=True) keep_bboxes=True)
runner.run(pipeline) p.run()
filenames = tf.io.gfile.glob(output_tfrecord+'-?????-of-?????') filenames = tf.io.gfile.glob(output_tfrecord+'-?????-of-?????')
actual_output = [] actual_output = []
record_iterator = tf.python_io.tf_record_iterator(path=filenames[0]) record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
......
...@@ -45,26 +45,14 @@ from __future__ import absolute_import ...@@ -45,26 +45,14 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import argparse
import os import os
import threading import threading
from absl import app
from absl import flags
import apache_beam as beam
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from apache_beam import runners try:
import apache_beam as beam # pylint:disable=g-import-not-at-top
except ModuleNotFoundError:
flags.DEFINE_string('detection_input_tfrecord', None, 'TFRecord containing ' pass
'images in tf.Example format for object detection.')
flags.DEFINE_string('detection_output_tfrecord', None,
'TFRecord containing detections in tf.Example format.')
flags.DEFINE_string('detection_model_dir', None, 'Path to directory containing'
'an object detection SavedModel.')
flags.DEFINE_float('confidence_threshold', 0.9,
'Min confidence to keep bounding boxes')
flags.DEFINE_integer('num_shards', 0, 'Number of output shards.')
FLAGS = flags.FLAGS
class GenerateDetectionDataFn(beam.DoFn): class GenerateDetectionDataFn(beam.DoFn):
...@@ -205,58 +193,103 @@ class GenerateDetectionDataFn(beam.DoFn): ...@@ -205,58 +193,103 @@ class GenerateDetectionDataFn(beam.DoFn):
return [example] return [example]
def construct_pipeline(input_tfrecord, output_tfrecord, model_dir, def construct_pipeline(pipeline, input_tfrecord, output_tfrecord, model_dir,
confidence_threshold, num_shards): confidence_threshold, num_shards):
"""Returns a Beam pipeline to run object detection inference. """Returns a Beam pipeline to run object detection inference.
Args: Args:
pipeline: Initialized beam pipeline.
input_tfrecord: A TFRecord of tf.train.Example protos containing images. input_tfrecord: A TFRecord of tf.train.Example protos containing images.
output_tfrecord: A TFRecord of tf.train.Example protos that contain images output_tfrecord: A TFRecord of tf.train.Example protos that contain images
in the input TFRecord and the detections from the model. in the input TFRecord and the detections from the model.
model_dir: Path to `saved_model` to use for inference. model_dir: Path to `saved_model` to use for inference.
confidence_threshold: Threshold to use when keeping detection results. confidence_threshold: Threshold to use when keeping detection results.
num_shards: The number of output shards. num_shards: The number of output shards.
"""
input_collection = (
pipeline | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
input_tfrecord,
coder=beam.coders.BytesCoder()))
output_collection = input_collection | 'RunInference' >> beam.ParDo(
GenerateDetectionDataFn(model_dir, confidence_threshold))
output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle()
_ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
output_tfrecord,
num_shards=num_shards,
coder=beam.coders.ProtoCoder(tf.train.Example))
def parse_args(argv):
"""Command-line argument parser.
Args:
argv: command line arguments
Returns: Returns:
pipeline: A Beam pipeline. beam_args: Arguments for the beam pipeline.
pipeline_args: Arguments for the pipeline options, such as runner type.
""" """
def pipeline(root): parser = argparse.ArgumentParser()
input_collection = ( parser.add_argument(
root | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord( '--detection_input_tfrecord',
input_tfrecord, dest='detection_input_tfrecord',
coder=beam.coders.BytesCoder())) required=True,
output_collection = input_collection | 'RunInference' >> beam.ParDo( help='TFRecord containing images in tf.Example format for object '
GenerateDetectionDataFn(model_dir, confidence_threshold)) 'detection.')
output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle() parser.add_argument(
_ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord( '--detection_output_tfrecord',
output_tfrecord, dest='detection_output_tfrecord',
num_shards=num_shards, required=True,
coder=beam.coders.ProtoCoder(tf.train.Example)) help='TFRecord containing detections in tf.Example format.')
return pipeline parser.add_argument(
'--detection_model_dir',
dest='detection_model_dir',
def main(_): required=True,
help='Path to directory containing an object detection SavedModel.')
parser.add_argument(
'--confidence_threshold',
dest='confidence_threshold',
default=0.9,
help='Min confidence to keep bounding boxes.')
parser.add_argument(
'--num_shards',
dest='num_shards',
default=0,
help='Number of output shards.')
beam_args, pipeline_args = parser.parse_known_args(argv)
return beam_args, pipeline_args
def main(argv=None, save_main_session=True):
"""Runs the Beam pipeline that performs inference. """Runs the Beam pipeline that performs inference.
Args: Args:
_: unused argv: Command line arguments.
save_main_session: Whether to save the main session.
""" """
# must create before flags are used
runner = runners.DirectRunner()
dirname = os.path.dirname(FLAGS.detection_output_tfrecord) args, pipeline_args = parse_args(argv)
pipeline_options = beam.options.pipeline_options.PipelineOptions(
pipeline_args)
pipeline_options.view_as(
beam.options.pipeline_options.SetupOptions).save_main_session = (
save_main_session)
dirname = os.path.dirname(args.detection_output_tfrecord)
tf.io.gfile.makedirs(dirname) tf.io.gfile.makedirs(dirname)
runner.run(
construct_pipeline(FLAGS.detection_input_tfrecord, p = beam.Pipeline(options=pipeline_options)
FLAGS.detection_output_tfrecord,
FLAGS.detection_model_dir, construct_pipeline(
FLAGS.confidence_threshold, p,
FLAGS.num_shards)) args.detection_input_tfrecord,
args.detection_output_tfrecord,
args.detection_model_dir,
args.confidence_threshold,
args.num_shards)
p.run()
if __name__ == '__main__': if __name__ == '__main__':
flags.mark_flags_as_required([ main()
'detection_input_tfrecord',
'detection_output_tfrecord',
'detection_model_dir'
])
app.run(main)
...@@ -32,13 +32,17 @@ from object_detection.core import model ...@@ -32,13 +32,17 @@ from object_detection.core import model
from object_detection.dataset_tools.context_rcnn import generate_detection_data from object_detection.dataset_tools.context_rcnn import generate_detection_data
from object_detection.protos import pipeline_pb2 from object_detection.protos import pipeline_pb2
from object_detection.utils import tf_version from object_detection.utils import tf_version
from apache_beam import runners
if six.PY2: if six.PY2:
import mock # pylint: disable=g-import-not-at-top import mock # pylint: disable=g-import-not-at-top
else: else:
mock = unittest.mock mock = unittest.mock
try:
import apache_beam as beam # pylint:disable=g-import-not-at-top
except ModuleNotFoundError:
pass
class FakeModel(model.DetectionModel): class FakeModel(model.DetectionModel):
"""A Fake Detection model with expected output nodes from post-processing.""" """A Fake Detection model with expected output nodes from post-processing."""
...@@ -67,6 +71,9 @@ class FakeModel(model.DetectionModel): ...@@ -67,6 +71,9 @@ class FakeModel(model.DetectionModel):
def restore_map(self, checkpoint_path, fine_tune_checkpoint_type): def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
pass pass
def restore_from_objects(self, fine_tune_checkpoint_type):
pass
def loss(self, prediction_dict, true_image_shapes): def loss(self, prediction_dict, true_image_shapes):
pass pass
...@@ -243,16 +250,18 @@ class GenerateDetectionDataTest(tf.test.TestCase): ...@@ -243,16 +250,18 @@ class GenerateDetectionDataTest(tf.test.TestCase):
def test_beam_pipeline(self): def test_beam_pipeline(self):
with InMemoryTFRecord([self._create_tf_example()]) as input_tfrecord: with InMemoryTFRecord([self._create_tf_example()]) as input_tfrecord:
runner = runners.DirectRunner()
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR')) temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
output_tfrecord = os.path.join(temp_dir, 'output_tfrecord') output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
saved_model_path = self._export_saved_model() saved_model_path = self._export_saved_model()
confidence_threshold = 0.8 confidence_threshold = 0.8
num_shards = 1 num_shards = 1
pipeline = generate_detection_data.construct_pipeline( pipeline_options = beam.options.pipeline_options.PipelineOptions(
input_tfrecord, output_tfrecord, saved_model_path, runner='DirectRunner')
p = beam.Pipeline(options=pipeline_options)
generate_detection_data.construct_pipeline(
p, input_tfrecord, output_tfrecord, saved_model_path,
confidence_threshold, num_shards) confidence_threshold, num_shards)
runner.run(pipeline) p.run()
filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????') filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
actual_output = [] actual_output = []
record_iterator = tf.python_io.tf_record_iterator(path=filenames[0]) record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
......
...@@ -34,7 +34,8 @@ python tensorflow_models/object_detection/export_inference_graph.py \ ...@@ -34,7 +34,8 @@ python tensorflow_models/object_detection/export_inference_graph.py \
--input_type tf_example \ --input_type tf_example \
--pipeline_config_path path/to/faster_rcnn_model.config \ --pipeline_config_path path/to/faster_rcnn_model.config \
--trained_checkpoint_prefix path/to/model.ckpt \ --trained_checkpoint_prefix path/to/model.ckpt \
--output_directory path/to/exported_model_directory --output_directory path/to/exported_model_directory \
--additional_output_tensor_names detection_features
python generate_embedding_data.py \ python generate_embedding_data.py \
--alsologtostderr \ --alsologtostderr \
...@@ -47,34 +48,19 @@ from __future__ import absolute_import ...@@ -47,34 +48,19 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import argparse
import datetime import datetime
import os import os
import threading import threading
from absl import app
from absl import flags
import apache_beam as beam
import numpy as np import numpy as np
import six import six
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from apache_beam import runners
flags.DEFINE_string('embedding_input_tfrecord', None, 'TFRecord containing'
'images in tf.Example format for object detection.')
flags.DEFINE_string('embedding_output_tfrecord', None,
'TFRecord containing embeddings in tf.Example format.')
flags.DEFINE_string('embedding_model_dir', None, 'Path to directory containing'
'an object detection SavedModel with'
'detection_box_classifier_features in the output.')
flags.DEFINE_integer('top_k_embedding_count', 1,
'The number of top k embeddings to add to the memory bank.'
)
flags.DEFINE_integer('bottom_k_embedding_count', 0,
'The number of bottom k embeddings to add to the memory '
'bank.')
flags.DEFINE_integer('num_shards', 0, 'Number of output shards.')
FLAGS = flags.FLAGS try:
import apache_beam as beam # pylint:disable=g-import-not-at-top
except ModuleNotFoundError:
pass
class GenerateEmbeddingDataFn(beam.DoFn): class GenerateEmbeddingDataFn(beam.DoFn):
...@@ -321,12 +307,13 @@ class GenerateEmbeddingDataFn(beam.DoFn): ...@@ -321,12 +307,13 @@ class GenerateEmbeddingDataFn(beam.DoFn):
return [example] return [example]
def construct_pipeline(input_tfrecord, output_tfrecord, model_dir, def construct_pipeline(pipeline, input_tfrecord, output_tfrecord, model_dir,
top_k_embedding_count, bottom_k_embedding_count, top_k_embedding_count, bottom_k_embedding_count,
num_shards): num_shards):
"""Returns a beam pipeline to run object detection inference. """Returns a beam pipeline to run object detection inference.
Args: Args:
pipeline: Initialized beam pipeline.
input_tfrecord: An TFRecord of tf.train.Example protos containing images. input_tfrecord: An TFRecord of tf.train.Example protos containing images.
output_tfrecord: An TFRecord of tf.train.Example protos that contain images output_tfrecord: An TFRecord of tf.train.Example protos that contain images
in the input TFRecord and the detections from the model. in the input TFRecord and the detections from the model.
...@@ -335,44 +322,98 @@ def construct_pipeline(input_tfrecord, output_tfrecord, model_dir, ...@@ -335,44 +322,98 @@ def construct_pipeline(input_tfrecord, output_tfrecord, model_dir,
bottom_k_embedding_count: The number of low-confidence embeddings to store. bottom_k_embedding_count: The number of low-confidence embeddings to store.
num_shards: The number of output shards. num_shards: The number of output shards.
""" """
def pipeline(root): input_collection = (
input_collection = ( pipeline | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
root | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord( input_tfrecord,
input_tfrecord, coder=beam.coders.BytesCoder()))
coder=beam.coders.BytesCoder())) output_collection = input_collection | 'ExtractEmbedding' >> beam.ParDo(
output_collection = input_collection | 'ExtractEmbedding' >> beam.ParDo( GenerateEmbeddingDataFn(model_dir, top_k_embedding_count,
GenerateEmbeddingDataFn(model_dir, top_k_embedding_count, bottom_k_embedding_count))
bottom_k_embedding_count)) output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle()
output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle() _ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
_ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord( output_tfrecord,
output_tfrecord, num_shards=num_shards,
num_shards=num_shards, coder=beam.coders.ProtoCoder(tf.train.Example))
coder=beam.coders.ProtoCoder(tf.train.Example))
return pipeline
def parse_args(argv):
"""Command-line argument parser.
def main(_):
Args:
argv: command line arguments
Returns:
beam_args: Arguments for the beam pipeline.
pipeline_args: Arguments for the pipeline options, such as runner type.
"""
parser = argparse.ArgumentParser()
parser.add_argument(
'--embedding_input_tfrecord',
dest='embedding_input_tfrecord',
required=True,
help='TFRecord containing images in tf.Example format for object '
'detection.')
parser.add_argument(
'--embedding_output_tfrecord',
dest='embedding_output_tfrecord',
required=True,
help='TFRecord containing embeddings in tf.Example format.')
parser.add_argument(
'--embedding_model_dir',
dest='embedding_model_dir',
required=True,
help='Path to directory containing an object detection SavedModel with'
'detection_box_classifier_features in the output.')
parser.add_argument(
'--top_k_embedding_count',
dest='top_k_embedding_count',
default=1,
help='The number of top k embeddings to add to the memory bank.')
parser.add_argument(
'--bottom_k_embedding_count',
dest='bottom_k_embedding_count',
default=0,
help='The number of bottom k embeddings to add to the memory bank.')
parser.add_argument(
'--num_shards',
dest='num_shards',
default=0,
help='Number of output shards.')
beam_args, pipeline_args = parser.parse_known_args(argv)
return beam_args, pipeline_args
def main(argv=None, save_main_session=True):
"""Runs the Beam pipeline that performs inference. """Runs the Beam pipeline that performs inference.
Args: Args:
_: unused argv: Command line arguments.
save_main_session: Whether to save the main session.
""" """
# must create before flags are used args, pipeline_args = parse_args(argv)
runner = runners.DirectRunner()
dirname = os.path.dirname(FLAGS.embedding_output_tfrecord) pipeline_options = beam.options.pipeline_options.PipelineOptions(
pipeline_args)
pipeline_options.view_as(
beam.options.pipeline_options.SetupOptions).save_main_session = (
save_main_session)
dirname = os.path.dirname(args.embedding_output_tfrecord)
tf.io.gfile.makedirs(dirname) tf.io.gfile.makedirs(dirname)
runner.run(
construct_pipeline(FLAGS.embedding_input_tfrecord, p = beam.Pipeline(options=pipeline_options)
FLAGS.embedding_output_tfrecord,
FLAGS.embedding_model_dir, FLAGS.top_k_embedding_count, construct_pipeline(
FLAGS.bottom_k_embedding_count, FLAGS.num_shards)) p,
args.embedding_input_tfrecord,
args.embedding_output_tfrecord,
args.embedding_model_dir,
args.top_k_embedding_count,
args.bottom_k_embedding_count,
args.num_shards)
p.run()
if __name__ == '__main__': if __name__ == '__main__':
flags.mark_flags_as_required([ main()
'embedding_input_tfrecord',
'embedding_output_tfrecord',
'embedding_model_dir'
])
app.run(main)
...@@ -30,13 +30,18 @@ from object_detection.core import model ...@@ -30,13 +30,18 @@ from object_detection.core import model
from object_detection.dataset_tools.context_rcnn import generate_embedding_data from object_detection.dataset_tools.context_rcnn import generate_embedding_data
from object_detection.protos import pipeline_pb2 from object_detection.protos import pipeline_pb2
from object_detection.utils import tf_version from object_detection.utils import tf_version
from apache_beam import runners
if six.PY2: if six.PY2:
import mock # pylint: disable=g-import-not-at-top import mock # pylint: disable=g-import-not-at-top
else: else:
mock = unittest.mock mock = unittest.mock
try:
import apache_beam as beam # pylint:disable=g-import-not-at-top
except ModuleNotFoundError:
pass
class FakeModel(model.DetectionModel): class FakeModel(model.DetectionModel):
"""A Fake Detection model with expected output nodes from post-processing.""" """A Fake Detection model with expected output nodes from post-processing."""
...@@ -73,6 +78,9 @@ class FakeModel(model.DetectionModel): ...@@ -73,6 +78,9 @@ class FakeModel(model.DetectionModel):
def restore_map(self, checkpoint_path, fine_tune_checkpoint_type): def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
pass pass
def restore_from_objects(self, fine_tune_checkpoint_type):
pass
def loss(self, prediction_dict, true_image_shapes): def loss(self, prediction_dict, true_image_shapes):
pass pass
...@@ -236,13 +244,13 @@ class GenerateEmbeddingData(tf.test.TestCase): ...@@ -236,13 +244,13 @@ class GenerateEmbeddingData(tf.test.TestCase):
.int64_list.value, [5]) .int64_list.value, [5])
self.assertAllEqual( self.assertAllEqual(
example.features.feature['image/object/class/text'] example.features.feature['image/object/class/text']
.bytes_list.value, ['hyena']) .bytes_list.value, [b'hyena'])
self.assertAllClose( self.assertAllClose(
example.features.feature['image/class/label'] example.features.feature['image/class/label']
.int64_list.value, [5]) .int64_list.value, [5])
self.assertAllEqual( self.assertAllEqual(
example.features.feature['image/class/text'] example.features.feature['image/class/text']
.bytes_list.value, ['hyena']) .bytes_list.value, [b'hyena'])
# Check other essential attributes. # Check other essential attributes.
self.assertAllEqual( self.assertAllEqual(
...@@ -251,7 +259,7 @@ class GenerateEmbeddingData(tf.test.TestCase): ...@@ -251,7 +259,7 @@ class GenerateEmbeddingData(tf.test.TestCase):
example.features.feature['image/width'].int64_list.value, [600]) example.features.feature['image/width'].int64_list.value, [600])
self.assertAllEqual( self.assertAllEqual(
example.features.feature['image/source_id'].bytes_list.value, example.features.feature['image/source_id'].bytes_list.value,
['image_id']) [b'image_id'])
self.assertTrue( self.assertTrue(
example.features.feature['image/encoded'].bytes_list.value) example.features.feature['image/encoded'].bytes_list.value)
...@@ -268,7 +276,7 @@ class GenerateEmbeddingData(tf.test.TestCase): ...@@ -268,7 +276,7 @@ class GenerateEmbeddingData(tf.test.TestCase):
.int64_list.value, [5]) .int64_list.value, [5])
self.assertAllEqual(tf.train.Example.FromString( self.assertAllEqual(tf.train.Example.FromString(
generated_example).features.feature['image/object/class/text'] generated_example).features.feature['image/object/class/text']
.bytes_list.value, ['hyena']) .bytes_list.value, [b'hyena'])
output = inference_fn.process(generated_example) output = inference_fn.process(generated_example)
output_example = output[0] output_example = output[0]
self.assert_expected_example(output_example) self.assert_expected_example(output_example)
...@@ -304,24 +312,26 @@ class GenerateEmbeddingData(tf.test.TestCase): ...@@ -304,24 +312,26 @@ class GenerateEmbeddingData(tf.test.TestCase):
.feature['image/object/class/label'].int64_list.value, [5]) .feature['image/object/class/label'].int64_list.value, [5])
self.assertAllEqual( self.assertAllEqual(
tf.train.Example.FromString(generated_example).features tf.train.Example.FromString(generated_example).features
.feature['image/object/class/text'].bytes_list.value, ['hyena']) .feature['image/object/class/text'].bytes_list.value, [b'hyena'])
output = inference_fn.process(generated_example) output = inference_fn.process(generated_example)
output_example = output[0] output_example = output[0]
self.assert_expected_example(output_example, botk=True) self.assert_expected_example(output_example, botk=True)
def test_beam_pipeline(self): def test_beam_pipeline(self):
with InMemoryTFRecord([self._create_tf_example()]) as input_tfrecord: with InMemoryTFRecord([self._create_tf_example()]) as input_tfrecord:
runner = runners.DirectRunner()
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR')) temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
output_tfrecord = os.path.join(temp_dir, 'output_tfrecord') output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
saved_model_path = self._export_saved_model() saved_model_path = self._export_saved_model()
top_k_embedding_count = 1 top_k_embedding_count = 1
bottom_k_embedding_count = 0 bottom_k_embedding_count = 0
num_shards = 1 num_shards = 1
pipeline = generate_embedding_data.construct_pipeline( pipeline_options = beam.options.pipeline_options.PipelineOptions(
input_tfrecord, output_tfrecord, saved_model_path, runner='DirectRunner')
p = beam.Pipeline(options=pipeline_options)
generate_embedding_data.construct_pipeline(
p, input_tfrecord, output_tfrecord, saved_model_path,
top_k_embedding_count, bottom_k_embedding_count, num_shards) top_k_embedding_count, bottom_k_embedding_count, num_shards)
runner.run(pipeline) p.run()
filenames = tf.io.gfile.glob( filenames = tf.io.gfile.glob(
output_tfrecord + '-?????-of-?????') output_tfrecord + '-?????-of-?????')
actual_output = [] actual_output = []
......
...@@ -14,6 +14,9 @@ ...@@ -14,6 +14,9 @@
# ============================================================================== # ==============================================================================
r"""Convert raw COCO dataset to TFRecord for object_detection. r"""Convert raw COCO dataset to TFRecord for object_detection.
This tool supports data generation for object detection (boxes, masks),
keypoint detection, and DensePose.
Please note that this tool creates sharded output files. Please note that this tool creates sharded output files.
Example usage: Example usage:
...@@ -63,7 +66,18 @@ tf.flags.DEFINE_string('train_keypoint_annotations_file', '', ...@@ -63,7 +66,18 @@ tf.flags.DEFINE_string('train_keypoint_annotations_file', '',
'Training annotations JSON file.') 'Training annotations JSON file.')
tf.flags.DEFINE_string('val_keypoint_annotations_file', '', tf.flags.DEFINE_string('val_keypoint_annotations_file', '',
'Validation annotations JSON file.') 'Validation annotations JSON file.')
# DensePose is only available for coco 2014.
tf.flags.DEFINE_string('train_densepose_annotations_file', '',
'Training annotations JSON file for DensePose.')
tf.flags.DEFINE_string('val_densepose_annotations_file', '',
'Validation annotations JSON file for DensePose.')
tf.flags.DEFINE_string('output_dir', '/tmp/', 'Output data directory.') tf.flags.DEFINE_string('output_dir', '/tmp/', 'Output data directory.')
# Whether to only produce images/annotations on person class (for keypoint /
# densepose task).
tf.flags.DEFINE_boolean('remove_non_person_annotations', False, 'Whether to '
'remove all annotations for non-person objects.')
tf.flags.DEFINE_boolean('remove_non_person_images', False, 'Whether to '
'remove all examples that do not contain a person.')
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
...@@ -77,13 +91,33 @@ _COCO_KEYPOINT_NAMES = [ ...@@ -77,13 +91,33 @@ _COCO_KEYPOINT_NAMES = [
b'left_knee', b'right_knee', b'left_ankle', b'right_ankle' b'left_knee', b'right_knee', b'left_ankle', b'right_ankle'
] ]
_COCO_PART_NAMES = [
b'torso_back', b'torso_front', b'right_hand', b'left_hand', b'left_foot',
b'right_foot', b'right_upper_leg_back', b'left_upper_leg_back',
b'right_upper_leg_front', b'left_upper_leg_front', b'right_lower_leg_back',
b'left_lower_leg_back', b'right_lower_leg_front', b'left_lower_leg_front',
b'left_upper_arm_back', b'right_upper_arm_back', b'left_upper_arm_front',
b'right_upper_arm_front', b'left_lower_arm_back', b'right_lower_arm_back',
b'left_lower_arm_front', b'right_lower_arm_front', b'right_face',
b'left_face',
]
_DP_PART_ID_OFFSET = 1
def clip_to_unit(x):
return min(max(x, 0.0), 1.0)
def create_tf_example(image, def create_tf_example(image,
annotations_list, annotations_list,
image_dir, image_dir,
category_index, category_index,
include_masks=False, include_masks=False,
keypoint_annotations_dict=None): keypoint_annotations_dict=None,
densepose_annotations_dict=None,
remove_non_person_annotations=False,
remove_non_person_images=False):
"""Converts image and annotations to a tf.Example proto. """Converts image and annotations to a tf.Example proto.
Args: Args:
...@@ -108,10 +142,23 @@ def create_tf_example(image, ...@@ -108,10 +142,23 @@ def create_tf_example(image,
dictionary with keys: [u'keypoints', u'num_keypoints'] represeting the dictionary with keys: [u'keypoints', u'num_keypoints'] represeting the
keypoint information for this person object annotation. If None, then keypoint information for this person object annotation. If None, then
no keypoint annotations will be populated. no keypoint annotations will be populated.
densepose_annotations_dict: A dictionary that maps from annotation_id to a
dictionary with keys: [u'dp_I', u'dp_x', u'dp_y', 'dp_U', 'dp_V']
representing part surface coordinates. For more information see
http://densepose.org/.
remove_non_person_annotations: Whether to remove any annotations that are
not the "person" class.
remove_non_person_images: Whether to remove any images that do not contain
at least one "person" annotation.
Returns: Returns:
key: SHA256 hash of the image.
example: The converted tf.Example example: The converted tf.Example
num_annotations_skipped: Number of (invalid) annotations that were ignored. num_annotations_skipped: Number of (invalid) annotations that were ignored.
num_keypoint_annotation_skipped: Number of keypoint annotations that were
skipped.
num_densepose_annotation_skipped: Number of DensePose annotations that were
skipped.
Raises: Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG ValueError: if the image pointed to by data['filename'] is not a valid JPEG
...@@ -146,6 +193,16 @@ def create_tf_example(image, ...@@ -146,6 +193,16 @@ def create_tf_example(image,
num_annotations_skipped = 0 num_annotations_skipped = 0
num_keypoint_annotation_used = 0 num_keypoint_annotation_used = 0
num_keypoint_annotation_skipped = 0 num_keypoint_annotation_skipped = 0
dp_part_index = []
dp_x = []
dp_y = []
dp_u = []
dp_v = []
dp_num_points = []
densepose_keys = ['dp_I', 'dp_U', 'dp_V', 'dp_x', 'dp_y', 'bbox']
include_densepose = densepose_annotations_dict is not None
num_densepose_annotation_used = 0
num_densepose_annotation_skipped = 0
for object_annotations in annotations_list: for object_annotations in annotations_list:
(x, y, width, height) = tuple(object_annotations['bbox']) (x, y, width, height) = tuple(object_annotations['bbox'])
if width <= 0 or height <= 0: if width <= 0 or height <= 0:
...@@ -154,14 +211,18 @@ def create_tf_example(image, ...@@ -154,14 +211,18 @@ def create_tf_example(image,
if x + width > image_width or y + height > image_height: if x + width > image_width or y + height > image_height:
num_annotations_skipped += 1 num_annotations_skipped += 1
continue continue
category_id = int(object_annotations['category_id'])
category_name = category_index[category_id]['name'].encode('utf8')
if remove_non_person_annotations and category_name != b'person':
num_annotations_skipped += 1
continue
xmin.append(float(x) / image_width) xmin.append(float(x) / image_width)
xmax.append(float(x + width) / image_width) xmax.append(float(x + width) / image_width)
ymin.append(float(y) / image_height) ymin.append(float(y) / image_height)
ymax.append(float(y + height) / image_height) ymax.append(float(y + height) / image_height)
is_crowd.append(object_annotations['iscrowd']) is_crowd.append(object_annotations['iscrowd'])
category_id = int(object_annotations['category_id'])
category_ids.append(category_id) category_ids.append(category_id)
category_names.append(category_index[category_id]['name'].encode('utf8')) category_names.append(category_name)
area.append(object_annotations['area']) area.append(object_annotations['area'])
if include_masks: if include_masks:
...@@ -197,6 +258,40 @@ def create_tf_example(image, ...@@ -197,6 +258,40 @@ def create_tf_example(image,
keypoints_visibility.extend([0] * len(_COCO_KEYPOINT_NAMES)) keypoints_visibility.extend([0] * len(_COCO_KEYPOINT_NAMES))
keypoints_name.extend(_COCO_KEYPOINT_NAMES) keypoints_name.extend(_COCO_KEYPOINT_NAMES)
num_keypoints.append(0) num_keypoints.append(0)
if include_densepose:
annotation_id = object_annotations['id']
if (annotation_id in densepose_annotations_dict and
all(key in densepose_annotations_dict[annotation_id]
for key in densepose_keys)):
dp_annotations = densepose_annotations_dict[annotation_id]
num_densepose_annotation_used += 1
dp_num_points.append(len(dp_annotations['dp_I']))
dp_part_index.extend([int(i - _DP_PART_ID_OFFSET)
for i in dp_annotations['dp_I']])
# DensePose surface coordinates are defined on a [256, 256] grid
# relative to each instance box (i.e. absolute coordinates in range
# [0., 256.]). The following converts the coordinates
# so that they are expressed in normalized image coordinates.
dp_x_box_rel = [
clip_to_unit(val / 256.) for val in dp_annotations['dp_x']]
dp_x_norm = [(float(x) + x_box_rel * width) / image_width
for x_box_rel in dp_x_box_rel]
dp_y_box_rel = [
clip_to_unit(val / 256.) for val in dp_annotations['dp_y']]
dp_y_norm = [(float(y) + y_box_rel * height) / image_height
for y_box_rel in dp_y_box_rel]
dp_x.extend(dp_x_norm)
dp_y.extend(dp_y_norm)
dp_u.extend(dp_annotations['dp_U'])
dp_v.extend(dp_annotations['dp_V'])
else:
dp_num_points.append(0)
if (remove_non_person_images and
not any(name == b'person' for name in category_names)):
return (key, None, num_annotations_skipped,
num_keypoint_annotation_skipped, num_densepose_annotation_skipped)
feature_dict = { feature_dict = {
'image/height': 'image/height':
dataset_util.int64_feature(image_height), dataset_util.int64_feature(image_height),
...@@ -243,15 +338,34 @@ def create_tf_example(image, ...@@ -243,15 +338,34 @@ def create_tf_example(image,
dataset_util.bytes_list_feature(keypoints_name)) dataset_util.bytes_list_feature(keypoints_name))
num_keypoint_annotation_skipped = ( num_keypoint_annotation_skipped = (
len(keypoint_annotations_dict) - num_keypoint_annotation_used) len(keypoint_annotations_dict) - num_keypoint_annotation_used)
if include_densepose:
feature_dict['image/object/densepose/num'] = (
dataset_util.int64_list_feature(dp_num_points))
feature_dict['image/object/densepose/part_index'] = (
dataset_util.int64_list_feature(dp_part_index))
feature_dict['image/object/densepose/x'] = (
dataset_util.float_list_feature(dp_x))
feature_dict['image/object/densepose/y'] = (
dataset_util.float_list_feature(dp_y))
feature_dict['image/object/densepose/u'] = (
dataset_util.float_list_feature(dp_u))
feature_dict['image/object/densepose/v'] = (
dataset_util.float_list_feature(dp_v))
num_densepose_annotation_skipped = (
len(densepose_annotations_dict) - num_densepose_annotation_used)
example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
return key, example, num_annotations_skipped, num_keypoint_annotation_skipped return (key, example, num_annotations_skipped,
num_keypoint_annotation_skipped, num_densepose_annotation_skipped)
def _create_tf_record_from_coco_annotations(annotations_file, image_dir, def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
output_path, include_masks, output_path, include_masks,
num_shards, num_shards,
keypoint_annotations_file=''): keypoint_annotations_file='',
densepose_annotations_file='',
remove_non_person_annotations=False,
remove_non_person_images=False):
"""Loads COCO annotation json files and converts to tf.Record format. """Loads COCO annotation json files and converts to tf.Record format.
Args: Args:
...@@ -264,6 +378,12 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir, ...@@ -264,6 +378,12 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
keypoint_annotations_file: JSON file containing the person keypoint keypoint_annotations_file: JSON file containing the person keypoint
annotations. If empty, then no person keypoint annotations will be annotations. If empty, then no person keypoint annotations will be
generated. generated.
densepose_annotations_file: JSON file containing the DensePose annotations.
If empty, then no DensePose annotations will be generated.
remove_non_person_annotations: Whether to remove any annotations that are
not the "person" class.
remove_non_person_images: Whether to remove any images that do not contain
at least one "person" annotation.
""" """
with contextlib2.ExitStack() as tf_record_close_stack, \ with contextlib2.ExitStack() as tf_record_close_stack, \
tf.gfile.GFile(annotations_file, 'r') as fid: tf.gfile.GFile(annotations_file, 'r') as fid:
...@@ -288,7 +408,8 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir, ...@@ -288,7 +408,8 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
if image_id not in annotations_index: if image_id not in annotations_index:
missing_annotation_count += 1 missing_annotation_count += 1
annotations_index[image_id] = [] annotations_index[image_id] = []
logging.info('%d images are missing annotations.', missing_annotation_count) logging.info('%d images are missing annotations.',
missing_annotation_count)
keypoint_annotations_index = {} keypoint_annotations_index = {}
if keypoint_annotations_file: if keypoint_annotations_file:
...@@ -301,8 +422,20 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir, ...@@ -301,8 +422,20 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
keypoint_annotations_index[image_id] = {} keypoint_annotations_index[image_id] = {}
keypoint_annotations_index[image_id][annotation['id']] = annotation keypoint_annotations_index[image_id][annotation['id']] = annotation
densepose_annotations_index = {}
if densepose_annotations_file:
with tf.gfile.GFile(densepose_annotations_file, 'r') as fid:
densepose_groundtruth_data = json.load(fid)
if 'annotations' in densepose_groundtruth_data:
for annotation in densepose_groundtruth_data['annotations']:
image_id = annotation['image_id']
if image_id not in densepose_annotations_index:
densepose_annotations_index[image_id] = {}
densepose_annotations_index[image_id][annotation['id']] = annotation
total_num_annotations_skipped = 0 total_num_annotations_skipped = 0
total_num_keypoint_annotations_skipped = 0 total_num_keypoint_annotations_skipped = 0
total_num_densepose_annotations_skipped = 0
for idx, image in enumerate(images): for idx, image in enumerate(images):
if idx % 100 == 0: if idx % 100 == 0:
logging.info('On image %d of %d', idx, len(images)) logging.info('On image %d of %d', idx, len(images))
...@@ -312,19 +445,31 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir, ...@@ -312,19 +445,31 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
keypoint_annotations_dict = {} keypoint_annotations_dict = {}
if image['id'] in keypoint_annotations_index: if image['id'] in keypoint_annotations_index:
keypoint_annotations_dict = keypoint_annotations_index[image['id']] keypoint_annotations_dict = keypoint_annotations_index[image['id']]
(_, tf_example, num_annotations_skipped, densepose_annotations_dict = None
num_keypoint_annotations_skipped) = create_tf_example( if densepose_annotations_file:
densepose_annotations_dict = {}
if image['id'] in densepose_annotations_index:
densepose_annotations_dict = densepose_annotations_index[image['id']]
(_, tf_example, num_annotations_skipped, num_keypoint_annotations_skipped,
num_densepose_annotations_skipped) = create_tf_example(
image, annotations_list, image_dir, category_index, include_masks, image, annotations_list, image_dir, category_index, include_masks,
keypoint_annotations_dict) keypoint_annotations_dict, densepose_annotations_dict,
remove_non_person_annotations, remove_non_person_images)
total_num_annotations_skipped += num_annotations_skipped total_num_annotations_skipped += num_annotations_skipped
total_num_keypoint_annotations_skipped += num_keypoint_annotations_skipped total_num_keypoint_annotations_skipped += num_keypoint_annotations_skipped
total_num_densepose_annotations_skipped += (
num_densepose_annotations_skipped)
shard_idx = idx % num_shards shard_idx = idx % num_shards
output_tfrecords[shard_idx].write(tf_example.SerializeToString()) if tf_example:
output_tfrecords[shard_idx].write(tf_example.SerializeToString())
logging.info('Finished writing, skipped %d annotations.', logging.info('Finished writing, skipped %d annotations.',
total_num_annotations_skipped) total_num_annotations_skipped)
if keypoint_annotations_file: if keypoint_annotations_file:
logging.info('Finished writing, skipped %d keypoint annotations.', logging.info('Finished writing, skipped %d keypoint annotations.',
total_num_keypoint_annotations_skipped) total_num_keypoint_annotations_skipped)
if densepose_annotations_file:
logging.info('Finished writing, skipped %d DensePose annotations.',
total_num_densepose_annotations_skipped)
def main(_): def main(_):
...@@ -347,20 +492,26 @@ def main(_): ...@@ -347,20 +492,26 @@ def main(_):
train_output_path, train_output_path,
FLAGS.include_masks, FLAGS.include_masks,
num_shards=100, num_shards=100,
keypoint_annotations_file=FLAGS.train_keypoint_annotations_file) keypoint_annotations_file=FLAGS.train_keypoint_annotations_file,
densepose_annotations_file=FLAGS.train_densepose_annotations_file,
remove_non_person_annotations=FLAGS.remove_non_person_annotations,
remove_non_person_images=FLAGS.remove_non_person_images)
_create_tf_record_from_coco_annotations( _create_tf_record_from_coco_annotations(
FLAGS.val_annotations_file, FLAGS.val_annotations_file,
FLAGS.val_image_dir, FLAGS.val_image_dir,
val_output_path, val_output_path,
FLAGS.include_masks, FLAGS.include_masks,
num_shards=100, num_shards=50,
keypoint_annotations_file=FLAGS.val_keypoint_annotations_file) keypoint_annotations_file=FLAGS.val_keypoint_annotations_file,
densepose_annotations_file=FLAGS.val_densepose_annotations_file,
remove_non_person_annotations=FLAGS.remove_non_person_annotations,
remove_non_person_images=FLAGS.remove_non_person_images)
_create_tf_record_from_coco_annotations( _create_tf_record_from_coco_annotations(
FLAGS.testdev_annotations_file, FLAGS.testdev_annotations_file,
FLAGS.test_image_dir, FLAGS.test_image_dir,
testdev_output_path, testdev_output_path,
FLAGS.include_masks, FLAGS.include_masks,
num_shards=100) num_shards=50)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -89,7 +89,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase): ...@@ -89,7 +89,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
} }
(_, example, (_, example,
num_annotations_skipped, _) = create_coco_tf_record.create_tf_example( num_annotations_skipped, _, _) = create_coco_tf_record.create_tf_example(
image, annotations_list, image_dir, category_index) image, annotations_list, image_dir, category_index)
self.assertEqual(num_annotations_skipped, 0) self.assertEqual(num_annotations_skipped, 0)
...@@ -156,7 +156,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase): ...@@ -156,7 +156,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
} }
(_, example, (_, example,
num_annotations_skipped, _) = create_coco_tf_record.create_tf_example( num_annotations_skipped, _, _) = create_coco_tf_record.create_tf_example(
image, annotations_list, image_dir, category_index, include_masks=True) image, annotations_list, image_dir, category_index, include_masks=True)
self.assertEqual(num_annotations_skipped, 0) self.assertEqual(num_annotations_skipped, 0)
...@@ -259,14 +259,14 @@ class CreateCocoTFRecordTest(tf.test.TestCase): ...@@ -259,14 +259,14 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
} }
} }
(_, example, _, _, example, _, num_keypoint_annotation_skipped, _ = (
num_keypoint_annotation_skipped) = create_coco_tf_record.create_tf_example( create_coco_tf_record.create_tf_example(
image, image,
annotations_list, annotations_list,
image_dir, image_dir,
category_index, category_index,
include_masks=False, include_masks=False,
keypoint_annotations_dict=keypoint_annotations_dict) keypoint_annotations_dict=keypoint_annotations_dict))
self.assertEqual(num_keypoint_annotation_skipped, 0) self.assertEqual(num_keypoint_annotation_skipped, 0)
self._assertProtoEqual( self._assertProtoEqual(
...@@ -310,6 +310,132 @@ class CreateCocoTFRecordTest(tf.test.TestCase): ...@@ -310,6 +310,132 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
example.features.feature[ example.features.feature[
'image/object/keypoint/visibility'].int64_list.value, vv) 'image/object/keypoint/visibility'].int64_list.value, vv)
def test_create_tf_example_with_dense_pose(self):
image_dir = self.get_temp_dir()
image_file_name = 'tmp_image.jpg'
image_data = np.random.randint(low=0, high=256, size=(256, 256, 3)).astype(
np.uint8)
save_path = os.path.join(image_dir, image_file_name)
image = PIL.Image.fromarray(image_data, 'RGB')
image.save(save_path)
image = {
'file_name': image_file_name,
'height': 256,
'width': 256,
'id': 11,
}
min_x, min_y = 64, 64
max_x, max_y = 128, 128
keypoints = []
num_visible_keypoints = 0
xv = []
yv = []
vv = []
for _ in range(17):
xc = min_x + int(np.random.rand()*(max_x - min_x))
yc = min_y + int(np.random.rand()*(max_y - min_y))
vis = np.random.randint(0, 3)
xv.append(xc)
yv.append(yc)
vv.append(vis)
keypoints.extend([xc, yc, vis])
num_visible_keypoints += (vis > 0)
annotations_list = [{
'area': 0.5,
'iscrowd': False,
'image_id': 11,
'bbox': [64, 64, 128, 128],
'category_id': 1,
'id': 1000
}]
num_points = 45
dp_i = np.random.randint(1, 25, (num_points,)).astype(np.float32)
dp_u = np.random.randn(num_points)
dp_v = np.random.randn(num_points)
dp_x = np.random.rand(num_points)*256.
dp_y = np.random.rand(num_points)*256.
densepose_annotations_dict = {
1000: {
'dp_I': dp_i,
'dp_U': dp_u,
'dp_V': dp_v,
'dp_x': dp_x,
'dp_y': dp_y,
'bbox': [64, 64, 128, 128],
}
}
category_index = {
1: {
'name': 'person',
'id': 1
}
}
_, example, _, _, num_densepose_annotation_skipped = (
create_coco_tf_record.create_tf_example(
image,
annotations_list,
image_dir,
category_index,
include_masks=False,
densepose_annotations_dict=densepose_annotations_dict))
self.assertEqual(num_densepose_annotation_skipped, 0)
self._assertProtoEqual(
example.features.feature['image/height'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/width'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/filename'].bytes_list.value,
[six.b(image_file_name)])
self._assertProtoEqual(
example.features.feature['image/source_id'].bytes_list.value,
[six.b(str(image['id']))])
self._assertProtoEqual(
example.features.feature['image/format'].bytes_list.value,
[six.b('jpeg')])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/class/text'].bytes_list.value,
[six.b('person')])
self._assertProtoEqual(
example.features.feature['image/object/densepose/num'].int64_list.value,
[num_points])
self.assertAllEqual(
example.features.feature[
'image/object/densepose/part_index'].int64_list.value,
dp_i.astype(np.int64) - create_coco_tf_record._DP_PART_ID_OFFSET)
self.assertAllClose(
example.features.feature['image/object/densepose/u'].float_list.value,
dp_u)
self.assertAllClose(
example.features.feature['image/object/densepose/v'].float_list.value,
dp_v)
expected_dp_x = (64 + dp_x * 128. / 256.) / 256.
expected_dp_y = (64 + dp_y * 128. / 256.) / 256.
self.assertAllClose(
example.features.feature['image/object/densepose/x'].float_list.value,
expected_dp_x)
self.assertAllClose(
example.features.feature['image/object/densepose/y'].float_list.value,
expected_dp_y)
def test_create_sharded_tf_record(self): def test_create_sharded_tf_record(self):
tmp_dir = self.get_temp_dir() tmp_dir = self.get_temp_dir()
image_paths = ['tmp1_image.jpg', 'tmp2_image.jpg'] image_paths = ['tmp1_image.jpg', 'tmp2_image.jpg']
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment