Commit b1025b3b authored by syiming's avatar syiming
Browse files

Merge remote-tracking branch 'upstream/master' into fasterrcnn_fpn_keras_feature_extractor

parents 69ce1c45 e9df75ab
......@@ -17,15 +17,17 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import unittest
import numpy as np
from six.moves import zip
import tensorflow.compat.v1 as tf
from object_detection.core import freezable_batch_norm
from object_detection.utils import tf_version
@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class FreezableBatchNormTest(tf.test.TestCase):
"""Tests for FreezableBatchNorm operations."""
......
......@@ -681,3 +681,95 @@ class HardExampleMiner(object):
num_positives, num_negatives)
class PenaltyReducedLogisticFocalLoss(Loss):
"""Penalty-reduced pixelwise logistic regression with focal loss.
The loss is defined in Equation (1) of the Objects as Points[1] paper.
Although the loss is defined per-pixel in the output space, this class
assumes that each pixel is an anchor to be compatible with the base class.
[1]: https://arxiv.org/abs/1904.07850
"""
def __init__(self, alpha=2.0, beta=4.0, sigmoid_clip_value=1e-4):
"""Constructor.
Args:
alpha: Focussing parameter of the focal loss. Increasing this will
decrease the loss contribution of the well classified examples.
beta: The local penalty reduction factor. Increasing this will decrease
the contribution of loss due to negative pixels near the keypoint.
sigmoid_clip_value: The sigmoid operation used internally will be clipped
between [sigmoid_clip_value, 1 - sigmoid_clip_value)
"""
self._alpha = alpha
self._beta = beta
self._sigmoid_clip_value = sigmoid_clip_value
super(PenaltyReducedLogisticFocalLoss, self).__init__()
def _compute_loss(self, prediction_tensor, target_tensor, weights):
"""Compute loss function.
In all input tensors, `num_anchors` is the total number of pixels in the
the output space.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing the predicted unscaled logits for each class.
The function will compute sigmoid on this tensor internally.
target_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing a tensor with the 'splatted' keypoints,
possibly using a gaussian kernel. This function assumes that
the target is bounded between [0, 1].
weights: a float tensor of shape, either [batch_size, num_anchors,
num_classes] or [batch_size, num_anchors, 1]. If the shape is
[batch_size, num_anchors, 1], all the classses are equally weighted.
Returns:
loss: a float tensor of shape [batch_size, num_anchors, num_classes]
representing the value of the loss function.
"""
is_present_tensor = tf.math.equal(target_tensor, 1.0)
prediction_tensor = tf.clip_by_value(tf.sigmoid(prediction_tensor),
self._sigmoid_clip_value,
1 - self._sigmoid_clip_value)
positive_loss = (tf.math.pow((1 - prediction_tensor), self._alpha)*
tf.math.log(prediction_tensor))
negative_loss = (tf.math.pow((1 - target_tensor), self._beta)*
tf.math.pow(prediction_tensor, self._alpha)*
tf.math.log(1 - prediction_tensor))
loss = -tf.where(is_present_tensor, positive_loss, negative_loss)
return loss * weights
class L1LocalizationLoss(Loss):
"""L1 loss or absolute difference.
When used in a per-pixel manner, each pixel should be given as an anchor.
"""
def _compute_loss(self, prediction_tensor, target_tensor, weights):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors]
representing the (encoded) predicted locations of objects.
target_tensor: A float tensor of shape [batch_size, num_anchors]
representing the regression targets
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a float tensor of shape [batch_size, num_anchors] tensor
representing the value of the loss function.
"""
return tf.losses.absolute_difference(
target_tensor,
prediction_tensor,
weights=weights,
loss_collection=None,
reduction=tf.losses.Reduction.NONE
)
......@@ -16,10 +16,6 @@
"""Provides functions to prefetch tensors to feed into models."""
import tensorflow.compat.v1 as tf
from object_detection.utils import tf_version
if not tf_version.is_tf1():
raise ValueError('`prefetcher.py` is only supported in Tensorflow 1.X')
def prefetch(tensor_dict, capacity):
"""Creates a prefetch queue for tensors.
......
......@@ -18,16 +18,16 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import unittest
from six.moves import range
import tensorflow.compat.v1 as tf
# pylint: disable=g-bad-import-order,
from object_detection.core import prefetcher
import tf_slim as slim
# pylint: disable=g-bad-import-order
from object_detection.core import prefetcher
from object_detection.utils import tf_version
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class PrefetcherTest(tf.test.TestCase):
"""Test class for prefetcher."""
......
......@@ -19,6 +19,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import unittest
from absl.testing import parameterized
import numpy as np
import six
......@@ -30,11 +31,12 @@ from object_detection.core import preprocessor
from object_detection.core import preprocessor_cache
from object_detection.core import standard_fields as fields
from object_detection.utils import test_case
from object_detection.utils import tf_version
if six.PY2:
import mock # pylint: disable=g-import-not-at-top
else:
from unittest import mock # pylint: disable=g-import-not-at-top
mock = unittest.mock # pylint: disable=g-import-not-at-top
class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
......@@ -2819,6 +2821,7 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
self.assertAllEqual(images_shape, patched_images_shape)
self.assertAllEqual(images, patched_images)
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
def testAutoAugmentImage(self):
def graph_fn():
preprocessing_options = []
......
......@@ -50,10 +50,12 @@ from object_detection.core import matcher as mat
from object_detection.core import region_similarity_calculator as sim_calc
from object_detection.core import standard_fields as fields
from object_detection.matchers import argmax_matcher
from object_detection.matchers import bipartite_matcher
from object_detection.utils import shape_utils
from object_detection.utils import target_assigner_utils as ta_utils
from object_detection.utils import tf_version
if tf_version.is_tf1():
from object_detection.matchers import bipartite_matcher # pylint: disable=g-import-not-at-top
ResizeMethod = tf2.image.ResizeMethod
......@@ -398,6 +400,8 @@ def create_target_assigner(reference, stage=None,
ValueError: if combination reference+stage is invalid.
"""
if reference == 'Multibox' and stage == 'proposal':
if tf_version.is_tf2():
raise ValueError('GreedyBipartiteMatcher is not supported in TF 2.X.')
similarity_calc = sim_calc.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder_instance = mean_stddev_box_coder.MeanStddevBoxCoder()
......@@ -713,3 +717,943 @@ def batch_assign_confidences(target_assigner,
batch_reg_weights, batch_match)
def _smallest_positive_root(a, b, c):
"""Returns the smallest positive root of a quadratic equation."""
discriminant = tf.sqrt(b ** 2 - 4 * a * c)
# TODO(vighneshb) We are currently using the slightly incorrect
# CenterNet implementation. The commented lines implement the fixed version
# in https://github.com/princeton-vl/CornerNet. Change the implementation
# after verifying it has no negative impact.
# root1 = (-b - discriminant) / (2 * a)
# root2 = (-b + discriminant) / (2 * a)
# return tf.where(tf.less(root1, 0), root2, root1)
return (-b + discriminant) / (2.0)
def max_distance_for_overlap(height, width, min_iou):
"""Computes how far apart bbox corners can lie while maintaining the iou.
Given a bounding box size, this function returns a lower bound on how far
apart the corners of another box can lie while still maintaining the given
IoU. The implementation is based on the `gaussian_radius` function in the
Objects as Points github repo: https://github.com/xingyizhou/CenterNet
Args:
height: A 1-D float Tensor representing height of the ground truth boxes.
width: A 1-D float Tensor representing width of the ground truth boxes.
min_iou: A float representing the minimum IoU desired.
Returns:
distance: A 1-D Tensor of distances, of the same length as the input
height and width tensors.
"""
# Given that the detected box is displaced at a distance `d`, the exact
# IoU value will depend on the angle at which each corner is displaced.
# We simplify our computation by assuming that each corner is displaced by
# a distance `d` in both x and y direction. This gives us a lower IoU than
# what is actually realizable and ensures that any box with corners less
# than `d` distance apart will always have an IoU greater than or equal
# to `min_iou`
# The following 3 cases can be worked on geometrically and come down to
# solving a quadratic inequality. In each case, to ensure `min_iou` we use
# the smallest positive root of the equation.
# Case where detected box is offset from ground truth and no box completely
# contains the other.
distance_detection_offset = _smallest_positive_root(
a=1, b=-(height + width),
c=width * height * ((1 - min_iou) / (1 + min_iou))
)
# Case where detection is smaller than ground truth and completely contained
# in it.
distance_detection_in_gt = _smallest_positive_root(
a=4, b=-2 * (height + width),
c=(1 - min_iou) * width * height
)
# Case where ground truth is smaller than detection and completely contained
# in it.
distance_gt_in_detection = _smallest_positive_root(
a=4 * min_iou, b=(2 * min_iou) * (width + height),
c=(min_iou - 1) * width * height
)
return tf.reduce_min([distance_detection_offset,
distance_gt_in_detection,
distance_detection_in_gt], axis=0)
def get_batch_predictions_from_indices(batch_predictions, indices):
"""Gets the values of predictions in a batch at the given indices.
The indices are expected to come from the offset targets generation functions
in this library. The returned value is intended to be used inside a loss
function.
Args:
batch_predictions: A tensor of shape [batch_size, height, width, 2] for
single class offsets and [batch_size, height, width, class, 2] for
multiple classes offsets (e.g. keypoint joint offsets) representing the
(height, width) or (y_offset, x_offset) predictions over a batch.
indices: A tensor of shape [num_instances, 3] for single class offset and
[num_instances, 4] for multiple classes offsets representing the indices
in the batch to be penalized in a loss function
Returns:
values: A tensor of shape [num_instances, 2] holding the predicted values
at the given indices.
"""
return tf.gather_nd(batch_predictions, indices)
def _compute_std_dev_from_box_size(boxes_height, boxes_width, min_overlap):
"""Computes the standard deviation of the Gaussian kernel from box size.
Args:
boxes_height: A 1D tensor with shape [num_instances] representing the height
of each box.
boxes_width: A 1D tensor with shape [num_instances] representing the width
of each box.
min_overlap: The minimum IOU overlap that boxes need to have to not be
penalized.
Returns:
A 1D tensor with shape [num_instances] representing the computed Gaussian
sigma for each of the box.
"""
# We are dividing by 3 so that points closer than the computed
# distance have a >99% CDF.
sigma = max_distance_for_overlap(boxes_height, boxes_width, min_overlap)
sigma = (2 * tf.math.maximum(tf.math.floor(sigma), 0.0) + 1) / 6.0
return sigma
class CenterNetCenterHeatmapTargetAssigner(object):
"""Wrapper to compute the object center heatmap."""
def __init__(self, stride, min_overlap=0.7):
"""Initializes the target assigner.
Args:
stride: int, the stride of the network in output pixels.
min_overlap: The minimum IOU overlap that boxes need to have to not be
penalized.
"""
self._stride = stride
self._min_overlap = min_overlap
def assign_center_targets_from_boxes(self,
height,
width,
gt_boxes_list,
gt_classes_list,
gt_weights_list=None):
"""Computes the object center heatmap target.
Args:
height: int, height of input to the model. This is used to
determine the height of the output.
width: int, width of the input to the model. This is used to
determine the width of the output.
gt_boxes_list: A list of float tensors with shape [num_boxes, 4]
representing the groundtruth detection bounding boxes for each sample in
the batch. The box coordinates are expected in normalized coordinates.
gt_classes_list: A list of float tensors with shape [num_boxes,
num_classes] representing the one-hot encoded class labels for each box
in the gt_boxes_list.
gt_weights_list: A list of float tensors with shape [num_boxes]
representing the weight of each groundtruth detection box.
Returns:
heatmap: A Tensor of size [batch_size, output_height, output_width,
num_classes] representing the per class center heatmap. output_height
and output_width are computed by dividing the input height and width by
the stride specified during initialization.
"""
out_height = tf.cast(height // self._stride, tf.float32)
out_width = tf.cast(width // self._stride, tf.float32)
# Compute the yx-grid to be used to generate the heatmap. Each returned
# tensor has shape of [out_height, out_width]
(y_grid, x_grid) = ta_utils.image_shape_to_grids(out_height, out_width)
heatmaps = []
if gt_weights_list is None:
gt_weights_list = [None] * len(gt_boxes_list)
# TODO(vighneshb) Replace the for loop with a batch version.
for boxes, class_targets, weights in zip(gt_boxes_list, gt_classes_list,
gt_weights_list):
boxes = box_list.BoxList(boxes)
# Convert the box coordinates to absolute output image dimension space.
boxes = box_list_ops.to_absolute_coordinates(boxes,
height // self._stride,
width // self._stride)
# Get the box center coordinates. Each returned tensors have the shape of
# [num_instances]
(y_center, x_center, boxes_height,
boxes_width) = boxes.get_center_coordinates_and_sizes()
# Compute the sigma from box size. The tensor shape: [num_instances].
sigma = _compute_std_dev_from_box_size(boxes_height, boxes_width,
self._min_overlap)
# Apply the Gaussian kernel to the center coordinates. Returned heatmap
# has shape of [out_height, out_width, num_classes]
heatmap = ta_utils.coordinates_to_heatmap(
y_grid=y_grid,
x_grid=x_grid,
y_coordinates=y_center,
x_coordinates=x_center,
sigma=sigma,
channel_onehot=class_targets,
channel_weights=weights)
heatmaps.append(heatmap)
# Return the stacked heatmaps over the batch.
return tf.stack(heatmaps, axis=0)
class CenterNetBoxTargetAssigner(object):
"""Wrapper to compute target tensors for the object detection task.
This class has methods that take as input a batch of ground truth tensors
(in the form of a list) and return the targets required to train the object
detection task.
"""
def __init__(self, stride):
"""Initializes the target assigner.
Args:
stride: int, the stride of the network in output pixels.
"""
self._stride = stride
def assign_size_and_offset_targets(self,
height,
width,
gt_boxes_list,
gt_weights_list=None):
"""Returns the box height/width and center offset targets and their indices.
The returned values are expected to be used with predicted tensors
of size (batch_size, height//self._stride, width//self._stride, 2). The
predicted values at the relevant indices can be retrieved with the
get_batch_predictions_from_indices function.
Args:
height: int, height of input to the model. This is used to determine the
height of the output.
width: int, width of the input to the model. This is used to determine the
width of the output.
gt_boxes_list: A list of float tensors with shape [num_boxes, 4]
representing the groundtruth detection bounding boxes for each sample in
the batch. The coordinates are expected in normalized coordinates.
gt_weights_list: A list of tensors with shape [num_boxes] corresponding to
the weight of each groundtruth detection box.
Returns:
batch_indices: an integer tensor of shape [num_boxes, 3] holding the
indices inside the predicted tensor which should be penalized. The
first column indicates the index along the batch dimension and the
second and third columns indicate the index along the y and x
dimensions respectively.
batch_box_height_width: a float tensor of shape [num_boxes, 2] holding
expected height and width of each box in the output space.
batch_offsets: a float tensor of shape [num_boxes, 2] holding the
expected y and x offset of each box in the output space.
batch_weights: a float tensor of shape [num_boxes] indicating the
weight of each prediction.
"""
if gt_weights_list is None:
gt_weights_list = [None] * len(gt_boxes_list)
batch_indices = []
batch_box_height_width = []
batch_weights = []
batch_offsets = []
for i, (boxes, weights) in enumerate(zip(gt_boxes_list, gt_weights_list)):
boxes = box_list.BoxList(boxes)
boxes = box_list_ops.to_absolute_coordinates(boxes,
height // self._stride,
width // self._stride)
# Get the box center coordinates. Each returned tensors have the shape of
# [num_boxes]
(y_center, x_center, boxes_height,
boxes_width) = boxes.get_center_coordinates_and_sizes()
num_boxes = tf.shape(x_center)
# Compute the offsets and indices of the box centers. Shape:
# offsets: [num_boxes, 2]
# indices: [num_boxes, 2]
(offsets, indices) = ta_utils.compute_floor_offsets_with_indices(
y_source=y_center, x_source=x_center)
# Assign ones if weights are not provided.
if weights is None:
weights = tf.ones(num_boxes, dtype=tf.float32)
# Shape of [num_boxes, 1] integer tensor filled with current batch index.
batch_index = i * tf.ones_like(indices[:, 0:1], dtype=tf.int32)
batch_indices.append(tf.concat([batch_index, indices], axis=1))
batch_box_height_width.append(
tf.stack([boxes_height, boxes_width], axis=1))
batch_weights.append(weights)
batch_offsets.append(offsets)
batch_indices = tf.concat(batch_indices, axis=0)
batch_box_height_width = tf.concat(batch_box_height_width, axis=0)
batch_weights = tf.concat(batch_weights, axis=0)
batch_offsets = tf.concat(batch_offsets, axis=0)
return (batch_indices, batch_box_height_width, batch_offsets, batch_weights)
# TODO(yuhuic): Update this class to handle the instance/keypoint weights.
# Currently those weights are used as "mask" to indicate whether an
# instance/keypoint should be considered or not (expecting only either 0 or 1
# value). In reality, the weights can be any value and this class should handle
# those values properly.
class CenterNetKeypointTargetAssigner(object):
"""Wrapper to compute target tensors for the CenterNet keypoint estimation.
This class has methods that take as input a batch of groundtruth tensors
(in the form of a list) and returns the targets required to train the
CenterNet model for keypoint estimation. Specifically, the class methods
expect the groundtruth in the following formats (consistent with the
standard Object Detection API). Note that usually the groundtruth tensors are
packed with a list which represents the batch dimension:
gt_classes_list: [Required] a list of 2D tf.float32 one-hot
(or k-hot) tensors of shape [num_instances, num_classes] containing the
class targets with the 0th index assumed to map to the first non-background
class.
gt_keypoints_list: [Required] a list of 3D tf.float32 tensors of
shape [num_instances, num_total_keypoints, 2] containing keypoint
coordinates. Note that the "num_total_keypoints" should be the sum of the
num_keypoints over all possible keypoint types, e.g. human pose, face.
For example, if a dataset contains both 17 human pose keypoints and 5 face
keypoints, then num_total_keypoints = 17 + 5 = 22.
If an intance contains only a subet of keypoints (e.g. human pose keypoints
but not face keypoints), the face keypoints will be filled with zeros.
Also note that keypoints are assumed to be provided in normalized
coordinates and missing keypoints should be encoded as NaN.
gt_keypoints_weights_list: [Optional] a list 3D tf.float32 tensors of shape
[num_instances, num_total_keypoints] representing the weights of each
keypoints. If not provided, then all not NaN keypoints will be equally
weighted.
gt_boxes_list: [Optional] a list of 2D tf.float32 tensors of shape
[num_instances, 4] containing coordinates of the groundtruth boxes.
Groundtruth boxes are provided in [y_min, x_min, y_max, x_max] format and
assumed to be normalized and clipped relative to the image window with
y_min <= y_max and x_min <= x_max.
Note that the boxes are only used to compute the center targets but are not
considered as required output of the keypoint task. If the boxes were not
provided, the center targets will be inferred from the keypoints
[not implemented yet].
gt_weights_list: [Optional] A list of 1D tf.float32 tensors of shape
[num_instances] containing weights for groundtruth boxes. Only useful when
gt_boxes_list is also provided.
"""
def __init__(self,
stride,
class_id,
keypoint_indices,
keypoint_std_dev=None,
per_keypoint_offset=False,
peak_radius=0):
"""Initializes a CenterNet keypoints target assigner.
Args:
stride: int, the stride of the network in output pixels.
class_id: int, the ID of the class (0-indexed) that contains the target
keypoints to consider in this task. For example, if the task is human
pose estimation, the class id should correspond to the "human" class.
keypoint_indices: A list of integers representing the indices of the
keypoints to be considered in this task. This is used to retrieve the
subset of the keypoints from gt_keypoints that should be considered in
this task.
keypoint_std_dev: A list of floats represent the standard deviation of the
Gaussian kernel used to generate the keypoint heatmap (in the unit of
output pixels). It is to provide the flexibility of using different
sizes of Gaussian kernel for each keypoint type. If not provided, then
all standard deviation will be the same as the default value (10.0 in
the output pixel space). If provided, the length of keypoint_std_dev
needs to be the same as the length of keypoint_indices, indicating the
standard deviation of each keypoint type.
per_keypoint_offset: boolean, indicating whether to assign offset for
each keypoint channel. If set False, the output offset target will have
the shape [batch_size, out_height, out_width, 2]. If set True, the
output offset target will have the shape [batch_size, out_height,
out_width, 2 * num_keypoints].
peak_radius: int, the radius (in the unit of output pixel) around heatmap
peak to assign the offset targets.
"""
self._stride = stride
self._class_id = class_id
self._keypoint_indices = keypoint_indices
self._per_keypoint_offset = per_keypoint_offset
self._peak_radius = peak_radius
if keypoint_std_dev is None:
self._keypoint_std_dev = ([_DEFAULT_KEYPOINT_OFFSET_STD_DEV] *
len(keypoint_indices))
else:
assert len(keypoint_indices) == len(keypoint_std_dev)
self._keypoint_std_dev = keypoint_std_dev
def _preprocess_keypoints_and_weights(self, out_height, out_width, keypoints,
class_onehot, class_weights,
keypoint_weights):
"""Preprocesses the keypoints and the corresponding keypoint weights.
This function performs several common steps to preprocess the keypoints and
keypoint weights features, including:
1) Select the subset of keypoints based on the keypoint indices, fill the
keypoint NaN values with zeros and convert to absoluate coordinates.
2) Generate the weights of the keypoint using the following information:
a. The class of the instance.
b. The NaN value of the keypoint coordinates.
c. The provided keypoint weights.
Args:
out_height: An integer or an interger tensor indicating the output height
of the model.
out_width: An integer or an interger tensor indicating the output width of
the model.
keypoints: A float tensor of shape [num_instances, num_total_keypoints, 2]
representing the original keypoint grountruth coordinates.
class_onehot: A float tensor of shape [num_instances, num_classes]
containing the class targets with the 0th index assumed to map to the
first non-background class.
class_weights: A float tensor of shape [num_instances] containing weights
for groundtruth instances.
keypoint_weights: A float tensor of shape
[num_instances, num_total_keypoints] representing the weights of each
keypoints.
Returns:
A tuple of two tensors:
keypoint_absolute: A float tensor of shape
[num_instances, num_keypoints, 2] which is the selected and updated
keypoint coordinates.
keypoint_weights: A float tensor of shape [num_instances, num_keypoints]
representing the updated weight of each keypoint.
"""
# Select the targets keypoints by their type ids and generate the mask
# of valid elements.
valid_mask, keypoints = ta_utils.get_valid_keypoint_mask_for_class(
keypoint_coordinates=keypoints,
class_id=self._class_id,
class_onehot=class_onehot,
class_weights=class_weights,
keypoint_indices=self._keypoint_indices)
# Keypoint coordinates in absolute coordinate system.
# The shape of the tensors: [num_instances, num_keypoints, 2].
keypoints_absolute = keypoint_ops.to_absolute_coordinates(
keypoints, out_height, out_width)
# Assign default weights for the keypoints.
if keypoint_weights is None:
keypoint_weights = tf.ones_like(keypoints[:, :, 0])
else:
keypoint_weights = tf.gather(
keypoint_weights, indices=self._keypoint_indices, axis=1)
keypoint_weights = keypoint_weights * valid_mask
return keypoints_absolute, keypoint_weights
def assign_keypoint_heatmap_targets(self,
height,
width,
gt_keypoints_list,
gt_classes_list,
gt_keypoints_weights_list=None,
gt_weights_list=None,
gt_boxes_list=None):
"""Returns the keypoint heatmap targets for the CenterNet model.
Args:
height: int, height of input to the CenterNet model. This is used to
determine the height of the output.
width: int, width of the input to the CenterNet model. This is used to
determine the width of the output.
gt_keypoints_list: A list of float tensors with shape [num_instances,
num_total_keypoints, 2]. See class-level description for more detail.
gt_classes_list: A list of float tensors with shape [num_instances,
num_classes]. See class-level description for more detail.
gt_keypoints_weights_list: A list of tensors with shape [num_instances,
num_total_keypoints] corresponding to the weight of each keypoint.
gt_weights_list: A list of float tensors with shape [num_instances]. See
class-level description for more detail.
gt_boxes_list: A list of float tensors with shape [num_instances, 4]. See
class-level description for more detail. If provided, the keypoint
standard deviations will be scaled based on the box sizes.
Returns:
heatmap: A float tensor of shape [batch_size, output_height, output_width,
num_keypoints] representing the per keypoint type center heatmap.
output_height and output_width are computed by dividing the input height
and width by the stride specified during initialization. Note that the
"num_keypoints" is defined by the length of keypoint_indices, which is
not necessarily equal to "num_total_keypoints".
num_instances_batch: A 2D int tensor of shape
[batch_size, num_keypoints] representing number of instances for each
keypoint type.
valid_mask: A float tensor with shape [batch_size, output_height,
output_width] where all values within the regions of the blackout boxes
are 0.0 and 1.0 else where.
"""
out_width = tf.cast(width // self._stride, tf.float32)
out_height = tf.cast(height // self._stride, tf.float32)
# Compute the yx-grid to be used to generate the heatmap. Each returned
# tensor has shape of [out_height, out_width]
y_grid, x_grid = ta_utils.image_shape_to_grids(out_height, out_width)
if gt_keypoints_weights_list is None:
gt_keypoints_weights_list = [None] * len(gt_keypoints_list)
if gt_weights_list is None:
gt_weights_list = [None] * len(gt_classes_list)
if gt_boxes_list is None:
gt_boxes_list = [None] * len(gt_keypoints_list)
heatmaps = []
num_instances_list = []
valid_mask_list = []
for keypoints, classes, kp_weights, weights, boxes in zip(
gt_keypoints_list, gt_classes_list, gt_keypoints_weights_list,
gt_weights_list, gt_boxes_list):
keypoints_absolute, kp_weights = self._preprocess_keypoints_and_weights(
out_height=out_height,
out_width=out_width,
keypoints=keypoints,
class_onehot=classes,
class_weights=weights,
keypoint_weights=kp_weights)
num_instances, num_keypoints, _ = (
shape_utils.combined_static_and_dynamic_shape(keypoints_absolute))
# A tensor of shape [num_instances, num_keypoints] with
# each element representing the type dimension for each corresponding
# keypoint:
# [[0, 1, ..., k-1],
# [0, 1, ..., k-1],
# :
# [0, 1, ..., k-1]]
keypoint_types = tf.tile(
input=tf.expand_dims(tf.range(num_keypoints), axis=0),
multiples=[num_instances, 1])
# A tensor of shape [num_instances, num_keypoints] with
# each element representing the sigma of the Gaussian kernel for each
# keypoint.
keypoint_std_dev = tf.tile(
input=tf.expand_dims(tf.constant(self._keypoint_std_dev), axis=0),
multiples=[num_instances, 1])
# If boxes is not None, then scale the standard deviation based on the
# size of the object bounding boxes similar to object center heatmap.
if boxes is not None:
boxes = box_list.BoxList(boxes)
# Convert the box coordinates to absolute output image dimension space.
boxes = box_list_ops.to_absolute_coordinates(boxes,
height // self._stride,
width // self._stride)
# Get the box height and width. Each returned tensors have the shape
# of [num_instances]
(_, _, boxes_height,
boxes_width) = boxes.get_center_coordinates_and_sizes()
# Compute the sigma from box size. The tensor shape: [num_instances].
sigma = _compute_std_dev_from_box_size(boxes_height, boxes_width, 0.7)
keypoint_std_dev = keypoint_std_dev * tf.stack(
[sigma] * num_keypoints, axis=1)
# Generate the valid region mask to ignore regions with target class but
# no corresponding keypoints.
# Shape: [num_instances].
blackout = tf.logical_and(classes[:, self._class_id] > 0,
tf.reduce_max(kp_weights, axis=1) < 1e-3)
valid_mask = ta_utils.blackout_pixel_weights_by_box_regions(
out_height, out_width, boxes.get(), blackout)
valid_mask_list.append(valid_mask)
# Apply the Gaussian kernel to the keypoint coordinates. Returned heatmap
# has shape of [out_height, out_width, num_keypoints].
heatmap = ta_utils.coordinates_to_heatmap(
y_grid=y_grid,
x_grid=x_grid,
y_coordinates=tf.keras.backend.flatten(keypoints_absolute[:, :, 0]),
x_coordinates=tf.keras.backend.flatten(keypoints_absolute[:, :, 1]),
sigma=tf.keras.backend.flatten(keypoint_std_dev),
channel_onehot=tf.one_hot(
tf.keras.backend.flatten(keypoint_types), depth=num_keypoints),
channel_weights=tf.keras.backend.flatten(kp_weights))
num_instances_list.append(
tf.cast(tf.reduce_sum(kp_weights, axis=0), dtype=tf.int32))
heatmaps.append(heatmap)
return (tf.stack(heatmaps, axis=0), tf.stack(num_instances_list, axis=0),
tf.stack(valid_mask_list, axis=0))
def _get_keypoint_types(self, num_instances, num_keypoints, num_neighbors):
"""Gets keypoint type index tensor.
The function prepares the tensor of keypoint indices with shape
[num_instances, num_keypoints, num_neighbors]. Each element represents the
keypoint type index for each corresponding keypoint and tiled along the 3rd
axis:
[[0, 1, ..., num_keypoints - 1],
[0, 1, ..., num_keypoints - 1],
:
[0, 1, ..., num_keypoints - 1]]
Args:
num_instances: int, the number of instances, used to define the 1st
dimension.
num_keypoints: int, the number of keypoint types, used to define the 2nd
dimension.
num_neighbors: int, the number of neighborhood pixels to consider for each
keypoint, used to define the 3rd dimension.
Returns:
A integer tensor of shape [num_instances, num_keypoints, num_neighbors].
"""
keypoint_types = tf.range(num_keypoints)[tf.newaxis, :, tf.newaxis]
tiled_keypoint_types = tf.tile(keypoint_types,
multiples=[num_instances, 1, num_neighbors])
return tiled_keypoint_types
def assign_keypoints_offset_targets(self,
height,
width,
gt_keypoints_list,
gt_classes_list,
gt_keypoints_weights_list=None,
gt_weights_list=None):
"""Returns the offsets and indices of the keypoints for location refinement.
The returned values are used to refine the location of each keypoints in the
heatmap. The predicted values at the relevant indices can be retrieved with
the get_batch_predictions_from_indices function.
Args:
height: int, height of input to the CenterNet model. This is used to
determine the height of the output.
width: int, width of the input to the CenterNet model. This is used to
determine the width of the output.
gt_keypoints_list: A list of tensors with shape [num_instances,
num_total_keypoints]. See class-level description for more detail.
gt_classes_list: A list of tensors with shape [num_instances,
num_classes]. See class-level description for more detail.
gt_keypoints_weights_list: A list of tensors with shape [num_instances,
num_total_keypoints] corresponding to the weight of each keypoint.
gt_weights_list: A list of float tensors with shape [num_instances]. See
class-level description for more detail.
Returns:
batch_indices: an integer tensor of shape [num_total_instances, 3] (or
[num_total_instances, 4] if 'per_keypoint_offset' is set True) holding
the indices inside the predicted tensor which should be penalized. The
first column indicates the index along the batch dimension and the
second and third columns indicate the index along the y and x
dimensions respectively. The fourth column corresponds to the channel
dimension (if 'per_keypoint_offset' is set True).
batch_offsets: a float tensor of shape [num_total_instances, 2] holding
the expected y and x offset of each box in the output space.
batch_weights: a float tensor of shape [num_total_instances] indicating
the weight of each prediction.
Note that num_total_instances = batch_size * num_instances *
num_keypoints * num_neighbors
"""
batch_indices = []
batch_offsets = []
batch_weights = []
if gt_keypoints_weights_list is None:
gt_keypoints_weights_list = [None] * len(gt_keypoints_list)
if gt_weights_list is None:
gt_weights_list = [None] * len(gt_classes_list)
for i, (keypoints, classes, kp_weights, weights) in enumerate(
zip(gt_keypoints_list, gt_classes_list, gt_keypoints_weights_list,
gt_weights_list)):
keypoints_absolute, kp_weights = self._preprocess_keypoints_and_weights(
out_height=height // self._stride,
out_width=width // self._stride,
keypoints=keypoints,
class_onehot=classes,
class_weights=weights,
keypoint_weights=kp_weights)
num_instances, num_keypoints, _ = (
shape_utils.combined_static_and_dynamic_shape(keypoints_absolute))
# [num_instances * num_keypoints]
y_source = tf.keras.backend.flatten(keypoints_absolute[:, :, 0])
x_source = tf.keras.backend.flatten(keypoints_absolute[:, :, 1])
# All keypoint coordinates and their neighbors:
# [num_instance * num_keypoints, num_neighbors]
(y_source_neighbors, x_source_neighbors,
valid_sources) = ta_utils.get_surrounding_grids(height // self._stride,
width // self._stride,
y_source, x_source,
self._peak_radius)
_, num_neighbors = shape_utils.combined_static_and_dynamic_shape(
y_source_neighbors)
# Update the valid keypoint weights.
# [num_instance * num_keypoints, num_neighbors]
valid_keypoints = tf.cast(
valid_sources, dtype=tf.float32) * tf.stack(
[tf.keras.backend.flatten(kp_weights)] * num_neighbors, axis=-1)
# Compute the offsets and indices of the box centers. Shape:
# offsets: [num_instances * num_keypoints, num_neighbors, 2]
# indices: [num_instances * num_keypoints, num_neighbors, 2]
offsets, indices = ta_utils.compute_floor_offsets_with_indices(
y_source=y_source_neighbors,
x_source=x_source_neighbors,
y_target=y_source,
x_target=x_source)
# Reshape to:
# offsets: [num_instances * num_keypoints * num_neighbors, 2]
# indices: [num_instances * num_keypoints * num_neighbors, 2]
offsets = tf.reshape(offsets, [-1, 2])
indices = tf.reshape(indices, [-1, 2])
# Prepare the batch indices to be prepended.
batch_index = tf.fill(
[num_instances * num_keypoints * num_neighbors, 1], i)
if self._per_keypoint_offset:
tiled_keypoint_types = self._get_keypoint_types(
num_instances, num_keypoints, num_neighbors)
batch_indices.append(
tf.concat([batch_index, indices,
tf.reshape(tiled_keypoint_types, [-1, 1])], axis=1))
else:
batch_indices.append(tf.concat([batch_index, indices], axis=1))
batch_offsets.append(offsets)
batch_weights.append(tf.keras.backend.flatten(valid_keypoints))
# Concatenate the tensors in the batch in the first dimension:
# shape: [batch_size * num_instances * num_keypoints * num_neighbors, 3] or
# [batch_size * num_instances * num_keypoints * num_neighbors, 4] if
# 'per_keypoint_offset' is set to True.
batch_indices = tf.concat(batch_indices, axis=0)
# shape: [batch_size * num_instances * num_keypoints * num_neighbors]
batch_weights = tf.concat(batch_weights, axis=0)
# shape: [batch_size * num_instances * num_keypoints * num_neighbors, 2]
batch_offsets = tf.concat(batch_offsets, axis=0)
return (batch_indices, batch_offsets, batch_weights)
def assign_joint_regression_targets(self,
height,
width,
gt_keypoints_list,
gt_classes_list,
gt_boxes_list=None,
gt_keypoints_weights_list=None,
gt_weights_list=None):
"""Returns the joint regression from center grid to keypoints.
The joint regression is used as the grouping cue from the estimated
keypoints to instance center. The offsets are the vectors from the floored
object center coordinates to the keypoint coordinates.
Args:
height: int, height of input to the CenterNet model. This is used to
determine the height of the output.
width: int, width of the input to the CenterNet model. This is used to
determine the width of the output.
gt_keypoints_list: A list of float tensors with shape [num_instances,
num_total_keypoints]. See class-level description for more detail.
gt_classes_list: A list of float tensors with shape [num_instances,
num_classes]. See class-level description for more detail.
gt_boxes_list: A list of float tensors with shape [num_instances, 4]. See
class-level description for more detail. If provided, then the center
targets will be computed based on the center of the boxes.
gt_keypoints_weights_list: A list of float tensors with shape
[num_instances, num_total_keypoints] representing to the weight of each
keypoint.
gt_weights_list: A list of float tensors with shape [num_instances]. See
class-level description for more detail.
Returns:
batch_indices: an integer tensor of shape [num_instances, 4] holding the
indices inside the predicted tensor which should be penalized. The
first column indicates the index along the batch dimension and the
second and third columns indicate the index along the y and x
dimensions respectively, the last dimension refers to the keypoint type
dimension.
batch_offsets: a float tensor of shape [num_instances, 2] holding the
expected y and x offset of each box in the output space.
batch_weights: a float tensor of shape [num_instances] indicating the
weight of each prediction.
Note that num_total_instances = batch_size * num_instances * num_keypoints
Raises:
NotImplementedError: currently the object center coordinates need to be
computed from groundtruth bounding boxes. The functionality of
generating the object center coordinates from keypoints is not
implemented yet.
"""
batch_indices = []
batch_offsets = []
batch_weights = []
batch_size = len(gt_keypoints_list)
if gt_keypoints_weights_list is None:
gt_keypoints_weights_list = [None] * batch_size
if gt_boxes_list is None:
gt_boxes_list = [None] * batch_size
if gt_weights_list is None:
gt_weights_list = [None] * len(gt_classes_list)
for i, (keypoints, classes, boxes, kp_weights, weights) in enumerate(
zip(gt_keypoints_list, gt_classes_list,
gt_boxes_list, gt_keypoints_weights_list, gt_weights_list)):
keypoints_absolute, kp_weights = self._preprocess_keypoints_and_weights(
out_height=height // self._stride,
out_width=width // self._stride,
keypoints=keypoints,
class_onehot=classes,
class_weights=weights,
keypoint_weights=kp_weights)
num_instances, num_keypoints, _ = (
shape_utils.combined_static_and_dynamic_shape(keypoints_absolute))
# If boxes are provided, compute the joint center from it.
if boxes is not None:
# Compute joint center from boxes.
boxes = box_list.BoxList(boxes)
boxes = box_list_ops.to_absolute_coordinates(boxes,
height // self._stride,
width // self._stride)
y_center, x_center, _, _ = boxes.get_center_coordinates_and_sizes()
else:
# TODO(yuhuic): Add the logic to generate object centers from keypoints.
raise NotImplementedError((
'The functionality of generating object centers from keypoints is'
' not implemented yet. Please provide groundtruth bounding boxes.'
))
# Tile the yx center coordinates to be the same shape as keypoints.
y_center_tiled = tf.tile(
tf.reshape(y_center, shape=[num_instances, 1]),
multiples=[1, num_keypoints])
x_center_tiled = tf.tile(
tf.reshape(x_center, shape=[num_instances, 1]),
multiples=[1, num_keypoints])
# [num_instance * num_keypoints, num_neighbors]
(y_source_neighbors, x_source_neighbors,
valid_sources) = ta_utils.get_surrounding_grids(
height // self._stride, width // self._stride,
tf.keras.backend.flatten(y_center_tiled),
tf.keras.backend.flatten(x_center_tiled), self._peak_radius)
_, num_neighbors = shape_utils.combined_static_and_dynamic_shape(
y_source_neighbors)
valid_keypoints = tf.cast(
valid_sources, dtype=tf.float32) * tf.stack(
[tf.keras.backend.flatten(kp_weights)] * num_neighbors, axis=-1)
# Compute the offsets and indices of the box centers. Shape:
# offsets: [num_instances * num_keypoints, 2]
# indices: [num_instances * num_keypoints, 2]
(offsets, indices) = ta_utils.compute_floor_offsets_with_indices(
y_source=y_source_neighbors,
x_source=x_source_neighbors,
y_target=tf.keras.backend.flatten(keypoints_absolute[:, :, 0]),
x_target=tf.keras.backend.flatten(keypoints_absolute[:, :, 1]))
# Reshape to:
# offsets: [num_instances * num_keypoints * num_neighbors, 2]
# indices: [num_instances * num_keypoints * num_neighbors, 2]
offsets = tf.reshape(offsets, [-1, 2])
indices = tf.reshape(indices, [-1, 2])
# keypoint type tensor: [num_instances, num_keypoints, num_neighbors].
tiled_keypoint_types = self._get_keypoint_types(
num_instances, num_keypoints, num_neighbors)
batch_index = tf.fill(
[num_instances * num_keypoints * num_neighbors, 1], i)
batch_indices.append(
tf.concat([batch_index, indices,
tf.reshape(tiled_keypoint_types, [-1, 1])], axis=1))
batch_offsets.append(offsets)
batch_weights.append(tf.keras.backend.flatten(valid_keypoints))
# Concatenate the tensors in the batch in the first dimension:
# shape: [batch_size * num_instances * num_keypoints, 4]
batch_indices = tf.concat(batch_indices, axis=0)
# shape: [batch_size * num_instances * num_keypoints]
batch_weights = tf.concat(batch_weights, axis=0)
# shape: [batch_size * num_instances * num_keypoints, 2]
batch_offsets = tf.concat(batch_offsets, axis=0)
return (batch_indices, batch_offsets, batch_weights)
class CenterNetMaskTargetAssigner(object):
"""Wrapper to compute targets for segmentation masks."""
def __init__(self, stride):
self._stride = stride
def assign_segmentation_targets(
self, gt_masks_list, gt_classes_list,
mask_resize_method=ResizeMethod.BILINEAR):
"""Computes the segmentation targets.
This utility produces a semantic segmentation mask for each class, starting
with whole image instance segmentation masks. Effectively, each per-class
segmentation target is the union of all masks from that class.
Args:
gt_masks_list: A list of float tensors with shape [num_boxes,
input_height, input_width] with values in {0, 1} representing instance
masks for each object.
gt_classes_list: A list of float tensors with shape [num_boxes,
num_classes] representing the one-hot encoded class labels for each box
in the gt_boxes_list.
mask_resize_method: A `tf.compat.v2.image.ResizeMethod`. The method to use
when resizing masks from input resolution to output resolution.
Returns:
segmentation_targets: An int32 tensor of size [batch_size, output_height,
output_width, num_classes] representing the class of each location in
the output space.
"""
# TODO(ronnyvotel): Handle groundtruth weights.
_, num_classes = shape_utils.combined_static_and_dynamic_shape(
gt_classes_list[0])
_, input_height, input_width = (
shape_utils.combined_static_and_dynamic_shape(gt_masks_list[0]))
output_height = input_height // self._stride
output_width = input_width // self._stride
segmentation_targets_list = []
for gt_masks, gt_classes in zip(gt_masks_list, gt_classes_list):
# Resize segmentation masks to conform to output dimensions. Use TF2
# image resize because TF1's version is buggy:
# https://yaqs.corp.google.com/eng/q/4970450458378240
gt_masks = tf2.image.resize(
gt_masks[:, :, :, tf.newaxis],
size=(output_height, output_width),
method=mask_resize_method)
gt_classes_reshaped = tf.reshape(gt_classes, [-1, 1, 1, num_classes])
# Shape: [h, w, num_classes].
segmentations_for_image = tf.reduce_max(
gt_masks * gt_classes_reshaped, axis=0)
segmentation_targets_list.append(segmentations_for_image)
segmentation_target = tf.stack(segmentation_targets_list, axis=0)
return segmentation_target
......@@ -24,9 +24,9 @@ from object_detection.core import region_similarity_calculator
from object_detection.core import standard_fields as fields
from object_detection.core import target_assigner as targetassigner
from object_detection.matchers import argmax_matcher
from object_detection.matchers import bipartite_matcher
from object_detection.utils import np_box_ops
from object_detection.utils import test_case
from object_detection.utils import tf_version
class TargetAssignerTest(test_case.TestCase):
......@@ -439,7 +439,7 @@ class TargetAssignerTest(test_case.TestCase):
def test_raises_error_on_incompatible_groundtruth_boxes_and_labels(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
matcher = argmax_matcher.ArgMaxMatcher(0.5)
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_class_label = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
target_assigner = targetassigner.TargetAssigner(
......@@ -469,7 +469,7 @@ class TargetAssignerTest(test_case.TestCase):
def test_raises_error_on_invalid_groundtruth_labels(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
matcher = argmax_matcher.ArgMaxMatcher(0.5)
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=1.0)
unmatched_class_label = tf.constant([[0, 0], [0, 0], [0, 0]], tf.float32)
target_assigner = targetassigner.TargetAssigner(
......@@ -1191,7 +1191,7 @@ class BatchTargetAssignConfidencesTest(test_case.TestCase):
])
class CreateTargetAssignerTest(tf.test.TestCase):
class CreateTargetAssignerTest(test_case.TestCase):
def test_create_target_assigner(self):
"""Tests that named constructor gives working target assigners.
......@@ -1202,9 +1202,10 @@ class CreateTargetAssignerTest(tf.test.TestCase):
groundtruth = box_list.BoxList(tf.constant(corners))
priors = box_list.BoxList(tf.constant(corners))
multibox_ta = (targetassigner
.create_target_assigner('Multibox', stage='proposal'))
multibox_ta.assign(priors, groundtruth)
if tf_version.is_tf1():
multibox_ta = (targetassigner
.create_target_assigner('Multibox', stage='proposal'))
multibox_ta.assign(priors, groundtruth)
# No tests on output, as that may vary arbitrarily as new target assigners
# are added. As long as it is constructed correctly and runs without errors,
# tests on the individual assigners cover correctness of the assignments.
......@@ -1229,6 +1230,681 @@ class CreateTargetAssignerTest(tf.test.TestCase):
stage='invalid_stage')
def _array_argmax(array):
return np.unravel_index(np.argmax(array), array.shape)
class CenterNetCenterHeatmapTargetAssignerTest(test_case.TestCase):
def setUp(self):
super(CenterNetCenterHeatmapTargetAssignerTest, self).setUp()
self._box_center = [0.0, 0.0, 1.0, 1.0]
self._box_center_small = [0.25, 0.25, 0.75, 0.75]
self._box_lower_left = [0.5, 0.0, 1.0, 0.5]
self._box_center_offset = [0.1, 0.05, 1.0, 1.0]
self._box_odd_coordinates = [0.1625, 0.2125, 0.5625, 0.9625]
def test_center_location(self):
"""Test that the centers are at the correct location."""
def graph_fn():
box_batch = [tf.constant([self._box_center, self._box_lower_left])]
classes = [
tf.one_hot([0, 1], depth=4),
]
assigner = targetassigner.CenterNetCenterHeatmapTargetAssigner(4)
targets = assigner.assign_center_targets_from_boxes(80, 80, box_batch,
classes)
return targets
targets = self.execute(graph_fn, [])
self.assertEqual((10, 10), _array_argmax(targets[0, :, :, 0]))
self.assertAlmostEqual(1.0, targets[0, 10, 10, 0])
self.assertEqual((15, 5), _array_argmax(targets[0, :, :, 1]))
self.assertAlmostEqual(1.0, targets[0, 15, 5, 1])
def test_center_batch_shape(self):
"""Test that the shape of the target for a batch is correct."""
def graph_fn():
box_batch = [
tf.constant([self._box_center, self._box_lower_left]),
tf.constant([self._box_center]),
tf.constant([self._box_center_small]),
]
classes = [
tf.one_hot([0, 1], depth=4),
tf.one_hot([2], depth=4),
tf.one_hot([3], depth=4),
]
assigner = targetassigner.CenterNetCenterHeatmapTargetAssigner(4)
targets = assigner.assign_center_targets_from_boxes(80, 80, box_batch,
classes)
return targets
targets = self.execute(graph_fn, [])
self.assertEqual((3, 20, 20, 4), targets.shape)
def test_center_overlap_maximum(self):
"""Test that when boxes overlap we, are computing the maximum."""
def graph_fn():
box_batch = [
tf.constant([
self._box_center, self._box_center_offset, self._box_center,
self._box_center_offset
])
]
classes = [
tf.one_hot([0, 0, 1, 2], depth=4),
]
assigner = targetassigner.CenterNetCenterHeatmapTargetAssigner(4)
targets = assigner.assign_center_targets_from_boxes(80, 80, box_batch,
classes)
return targets
targets = self.execute(graph_fn, [])
class0_targets = targets[0, :, :, 0]
class1_targets = targets[0, :, :, 1]
class2_targets = targets[0, :, :, 2]
np.testing.assert_allclose(class0_targets,
np.maximum(class1_targets, class2_targets))
def test_size_blur(self):
"""Test that the heatmap of a larger box is more blurred."""
def graph_fn():
box_batch = [tf.constant([self._box_center, self._box_center_small])]
classes = [
tf.one_hot([0, 1], depth=4),
]
assigner = targetassigner.CenterNetCenterHeatmapTargetAssigner(4)
targets = assigner.assign_center_targets_from_boxes(80, 80, box_batch,
classes)
return targets
targets = self.execute(graph_fn, [])
self.assertGreater(
np.count_nonzero(targets[:, :, :, 0]),
np.count_nonzero(targets[:, :, :, 1]))
def test_weights(self):
"""Test that the weights correctly ignore ground truth."""
def graph1_fn():
box_batch = [
tf.constant([self._box_center, self._box_lower_left]),
tf.constant([self._box_center]),
tf.constant([self._box_center_small]),
]
classes = [
tf.one_hot([0, 1], depth=4),
tf.one_hot([2], depth=4),
tf.one_hot([3], depth=4),
]
assigner = targetassigner.CenterNetCenterHeatmapTargetAssigner(4)
targets = assigner.assign_center_targets_from_boxes(80, 80, box_batch,
classes)
return targets
targets = self.execute(graph1_fn, [])
self.assertAlmostEqual(1.0, targets[0, :, :, 0].max())
self.assertAlmostEqual(1.0, targets[0, :, :, 1].max())
self.assertAlmostEqual(1.0, targets[1, :, :, 2].max())
self.assertAlmostEqual(1.0, targets[2, :, :, 3].max())
self.assertAlmostEqual(0.0, targets[0, :, :, [2, 3]].max())
self.assertAlmostEqual(0.0, targets[1, :, :, [0, 1, 3]].max())
self.assertAlmostEqual(0.0, targets[2, :, :, :3].max())
def graph2_fn():
weights = [
tf.constant([0., 1.]),
tf.constant([1.]),
tf.constant([1.]),
]
box_batch = [
tf.constant([self._box_center, self._box_lower_left]),
tf.constant([self._box_center]),
tf.constant([self._box_center_small]),
]
classes = [
tf.one_hot([0, 1], depth=4),
tf.one_hot([2], depth=4),
tf.one_hot([3], depth=4),
]
assigner = targetassigner.CenterNetCenterHeatmapTargetAssigner(4)
targets = assigner.assign_center_targets_from_boxes(80, 80, box_batch,
classes,
weights)
return targets
targets = self.execute(graph2_fn, [])
self.assertAlmostEqual(1.0, targets[0, :, :, 1].max())
self.assertAlmostEqual(1.0, targets[1, :, :, 2].max())
self.assertAlmostEqual(1.0, targets[2, :, :, 3].max())
self.assertAlmostEqual(0.0, targets[0, :, :, [0, 2, 3]].max())
self.assertAlmostEqual(0.0, targets[1, :, :, [0, 1, 3]].max())
self.assertAlmostEqual(0.0, targets[2, :, :, :3].max())
def test_low_overlap(self):
def graph1_fn():
box_batch = [tf.constant([self._box_center])]
classes = [
tf.one_hot([0], depth=2),
]
assigner = targetassigner.CenterNetCenterHeatmapTargetAssigner(
4, min_overlap=0.1)
targets_low_overlap = assigner.assign_center_targets_from_boxes(
80, 80, box_batch, classes)
return targets_low_overlap
targets_low_overlap = self.execute(graph1_fn, [])
self.assertLess(1, np.count_nonzero(targets_low_overlap))
def graph2_fn():
box_batch = [tf.constant([self._box_center])]
classes = [
tf.one_hot([0], depth=2),
]
assigner = targetassigner.CenterNetCenterHeatmapTargetAssigner(
4, min_overlap=0.6)
targets_medium_overlap = assigner.assign_center_targets_from_boxes(
80, 80, box_batch, classes)
return targets_medium_overlap
targets_medium_overlap = self.execute(graph2_fn, [])
self.assertLess(1, np.count_nonzero(targets_medium_overlap))
def graph3_fn():
box_batch = [tf.constant([self._box_center])]
classes = [
tf.one_hot([0], depth=2),
]
assigner = targetassigner.CenterNetCenterHeatmapTargetAssigner(
4, min_overlap=0.99)
targets_high_overlap = assigner.assign_center_targets_from_boxes(
80, 80, box_batch, classes)
return targets_high_overlap
targets_high_overlap = self.execute(graph3_fn, [])
self.assertTrue(np.all(targets_low_overlap >= targets_medium_overlap))
self.assertTrue(np.all(targets_medium_overlap >= targets_high_overlap))
def test_empty_box_list(self):
"""Test that an empty box list gives an all 0 heatmap."""
def graph_fn():
box_batch = [
tf.zeros((0, 4), dtype=tf.float32),
]
classes = [
tf.zeros((0, 5), dtype=tf.float32),
]
assigner = targetassigner.CenterNetCenterHeatmapTargetAssigner(
4, min_overlap=0.1)
targets = assigner.assign_center_targets_from_boxes(
80, 80, box_batch, classes)
return targets
targets = self.execute(graph_fn, [])
np.testing.assert_allclose(targets, 0.)
class CenterNetBoxTargetAssignerTest(test_case.TestCase):
def setUp(self):
super(CenterNetBoxTargetAssignerTest, self).setUp()
self._box_center = [0.0, 0.0, 1.0, 1.0]
self._box_center_small = [0.25, 0.25, 0.75, 0.75]
self._box_lower_left = [0.5, 0.0, 1.0, 0.5]
self._box_center_offset = [0.1, 0.05, 1.0, 1.0]
self._box_odd_coordinates = [0.1625, 0.2125, 0.5625, 0.9625]
def test_max_distance_for_overlap(self):
"""Test that the distance ensures the IoU with random boxes."""
# TODO(vighneshb) remove this after the `_smallest_positive_root`
# function if fixed.
self.skipTest(('Skipping test because we are using an incorrect version of'
'the `max_distance_for_overlap` function to reproduce'
' results.'))
rng = np.random.RandomState(0)
n_samples = 100
width = rng.uniform(1, 100, size=n_samples)
height = rng.uniform(1, 100, size=n_samples)
min_iou = rng.uniform(0.1, 1.0, size=n_samples)
def graph_fn():
max_dist = targetassigner.max_distance_for_overlap(height, width, min_iou)
return max_dist
max_dist = self.execute(graph_fn, [])
xmin1 = np.zeros(n_samples)
ymin1 = np.zeros(n_samples)
xmax1 = np.zeros(n_samples) + width
ymax1 = np.zeros(n_samples) + height
xmin2 = max_dist * np.cos(rng.uniform(0, 2 * np.pi))
ymin2 = max_dist * np.sin(rng.uniform(0, 2 * np.pi))
xmax2 = width + max_dist * np.cos(rng.uniform(0, 2 * np.pi))
ymax2 = height + max_dist * np.sin(rng.uniform(0, 2 * np.pi))
boxes1 = np.vstack([ymin1, xmin1, ymax1, xmax1]).T
boxes2 = np.vstack([ymin2, xmin2, ymax2, xmax2]).T
iou = np.diag(np_box_ops.iou(boxes1, boxes2))
self.assertTrue(np.all(iou >= min_iou))
def test_max_distance_for_overlap_centernet(self):
"""Test the version of the function used in the CenterNet paper."""
def graph_fn():
distance = targetassigner.max_distance_for_overlap(10, 5, 0.5)
return distance
distance = self.execute(graph_fn, [])
self.assertAlmostEqual(2.807764064, distance)
def test_assign_size_and_offset_targets(self):
"""Test the assign_size_and_offset_targets function."""
def graph_fn():
box_batch = [
tf.constant([self._box_center, self._box_lower_left]),
tf.constant([self._box_center_offset]),
tf.constant([self._box_center_small, self._box_odd_coordinates]),
]
assigner = targetassigner.CenterNetBoxTargetAssigner(4)
indices, hw, yx_offset, weights = assigner.assign_size_and_offset_targets(
80, 80, box_batch)
return indices, hw, yx_offset, weights
indices, hw, yx_offset, weights = self.execute(graph_fn, [])
self.assertEqual(indices.shape, (5, 3))
self.assertEqual(hw.shape, (5, 2))
self.assertEqual(yx_offset.shape, (5, 2))
self.assertEqual(weights.shape, (5,))
np.testing.assert_array_equal(
indices,
[[0, 10, 10], [0, 15, 5], [1, 11, 10], [2, 10, 10], [2, 7, 11]])
np.testing.assert_array_equal(
hw, [[20, 20], [10, 10], [18, 19], [10, 10], [8, 15]])
np.testing.assert_array_equal(
yx_offset, [[0, 0], [0, 0], [0, 0.5], [0, 0], [0.25, 0.75]])
np.testing.assert_array_equal(weights, 1)
def test_assign_size_and_offset_targets_weights(self):
"""Test the assign_size_and_offset_targets function with box weights."""
def graph_fn():
box_batch = [
tf.constant([self._box_center, self._box_lower_left]),
tf.constant([self._box_lower_left, self._box_center_small]),
tf.constant([self._box_center_small, self._box_odd_coordinates]),
]
cn_assigner = targetassigner.CenterNetBoxTargetAssigner(4)
weights_batch = [
tf.constant([0.0, 1.0]),
tf.constant([1.0, 1.0]),
tf.constant([0.0, 0.0])
]
indices, hw, yx_offset, weights = cn_assigner.assign_size_and_offset_targets(
80, 80, box_batch, weights_batch)
return indices, hw, yx_offset, weights
indices, hw, yx_offset, weights = self.execute(graph_fn, [])
self.assertEqual(indices.shape, (6, 3))
self.assertEqual(hw.shape, (6, 2))
self.assertEqual(yx_offset.shape, (6, 2))
self.assertEqual(weights.shape, (6,))
np.testing.assert_array_equal(indices,
[[0, 10, 10], [0, 15, 5], [1, 15, 5],
[1, 10, 10], [2, 10, 10], [2, 7, 11]])
np.testing.assert_array_equal(
hw, [[20, 20], [10, 10], [10, 10], [10, 10], [10, 10], [8, 15]])
np.testing.assert_array_equal(
yx_offset, [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0.25, 0.75]])
np.testing.assert_array_equal(weights, [0, 1, 1, 1, 0, 0])
def test_get_batch_predictions_from_indices(self):
"""Test the get_batch_predictions_from_indices function.
This test verifies that the indices returned by
assign_size_and_offset_targets function work as expected with a predicted
tensor.
"""
def graph_fn():
box_batch = [
tf.constant([self._box_center, self._box_lower_left]),
tf.constant([self._box_center_small, self._box_odd_coordinates]),
]
pred_array = np.ones((2, 40, 20, 2), dtype=np.int32) * -1000
pred_array[0, 20, 10] = [1, 2]
pred_array[0, 30, 5] = [3, 4]
pred_array[1, 20, 10] = [5, 6]
pred_array[1, 14, 11] = [7, 8]
pred_tensor = tf.constant(pred_array)
cn_assigner = targetassigner.CenterNetBoxTargetAssigner(4)
indices, _, _, _ = cn_assigner.assign_size_and_offset_targets(
160, 80, box_batch)
preds = targetassigner.get_batch_predictions_from_indices(
pred_tensor, indices)
return preds
preds = self.execute(graph_fn, [])
np.testing.assert_array_equal(preds, [[1, 2], [3, 4], [5, 6], [7, 8]])
class CenterNetKeypointTargetAssignerTest(test_case.TestCase):
def test_keypoint_heatmap_targets(self):
def graph_fn():
gt_classes_list = [
tf.one_hot([0, 1, 0, 1], depth=4),
]
coordinates = tf.expand_dims(
tf.constant(
np.array([[0.1, 0.2, 0.3, 0.4, 0.5],
[float('nan'), 0.7, float('nan'), 0.9, 1.0],
[0.4, 0.1, 0.4, 0.2, 0.1],
[float('nan'), 0.1, 0.5, 0.7, 0.6]]),
dtype=tf.float32),
axis=2)
gt_keypoints_list = [tf.concat([coordinates, coordinates], axis=2)]
gt_boxes_list = [
tf.constant(
np.array([[0.0, 0.0, 0.3, 0.3],
[0.0, 0.0, 0.5, 0.5],
[0.0, 0.0, 0.5, 0.5],
[0.0, 0.0, 1.0, 1.0]]),
dtype=tf.float32)
]
cn_assigner = targetassigner.CenterNetKeypointTargetAssigner(
stride=4,
class_id=1,
keypoint_indices=[0, 2])
(targets, num_instances_batch,
valid_mask) = cn_assigner.assign_keypoint_heatmap_targets(
120,
80,
gt_keypoints_list,
gt_classes_list,
gt_boxes_list=gt_boxes_list)
return targets, num_instances_batch, valid_mask
targets, num_instances_batch, valid_mask = self.execute(graph_fn, [])
# keypoint (0.5, 0.5) is selected. The peak is expected to appear at the
# center of the image.
self.assertEqual((15, 10), _array_argmax(targets[0, :, :, 1]))
self.assertAlmostEqual(1.0, targets[0, 15, 10, 1])
# No peak for the first class since NaN is selected.
self.assertAlmostEqual(0.0, targets[0, 15, 10, 0])
# Verify the output heatmap shape.
self.assertAllEqual([1, 30, 20, 2], targets.shape)
# Verify the number of instances is correct.
np.testing.assert_array_almost_equal([[0, 1]],
num_instances_batch)
# When calling the function, we specify the class id to be 1 (1th and 3rd)
# instance and the keypoint indices to be [0, 2], meaning that the 1st
# instance is the target class with no valid keypoints in it. As a result,
# the region of the 1st instance boxing box should be blacked out
# (0.0, 0.0, 0.5, 0.5), transfering to (0, 0, 15, 10) in absolute output
# space.
self.assertAlmostEqual(np.sum(valid_mask[:, 0:16, 0:11]), 0.0)
# All other values are 1.0 so the sum is: 30 * 20 - 16 * 11 = 424.
self.assertAlmostEqual(np.sum(valid_mask), 424.0)
def test_assign_keypoints_offset_targets(self):
def graph_fn():
gt_classes_list = [
tf.one_hot([0, 1, 0, 1], depth=4),
]
coordinates = tf.expand_dims(
tf.constant(
np.array([[0.1, 0.2, 0.3, 0.4, 0.5],
[float('nan'), 0.7, float('nan'), 0.9, 0.4],
[0.4, 0.1, 0.4, 0.2, 0.0],
[float('nan'), 0.0, 0.12, 0.7, 0.4]]),
dtype=tf.float32),
axis=2)
gt_keypoints_list = [tf.concat([coordinates, coordinates], axis=2)]
cn_assigner = targetassigner.CenterNetKeypointTargetAssigner(
stride=4,
class_id=1,
keypoint_indices=[0, 2])
(indices, offsets, weights) = cn_assigner.assign_keypoints_offset_targets(
height=120,
width=80,
gt_keypoints_list=gt_keypoints_list,
gt_classes_list=gt_classes_list)
return indices, weights, offsets
indices, weights, offsets = self.execute(graph_fn, [])
# Only the last element has positive weight.
np.testing.assert_array_almost_equal(
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], weights)
# Validate the last element's indices and offsets.
np.testing.assert_array_equal([0, 3, 2], indices[7, :])
np.testing.assert_array_almost_equal([0.6, 0.4], offsets[7, :])
def test_assign_keypoints_offset_targets_radius(self):
def graph_fn():
gt_classes_list = [
tf.one_hot([0, 1, 0, 1], depth=4),
]
coordinates = tf.expand_dims(
tf.constant(
np.array([[0.1, 0.2, 0.3, 0.4, 0.5],
[float('nan'), 0.7, float('nan'), 0.9, 0.4],
[0.4, 0.1, 0.4, 0.2, 0.0],
[float('nan'), 0.0, 0.12, 0.7, 0.4]]),
dtype=tf.float32),
axis=2)
gt_keypoints_list = [tf.concat([coordinates, coordinates], axis=2)]
cn_assigner = targetassigner.CenterNetKeypointTargetAssigner(
stride=4,
class_id=1,
keypoint_indices=[0, 2],
peak_radius=1,
per_keypoint_offset=True)
(indices, offsets, weights) = cn_assigner.assign_keypoints_offset_targets(
height=120,
width=80,
gt_keypoints_list=gt_keypoints_list,
gt_classes_list=gt_classes_list)
return indices, weights, offsets
indices, weights, offsets = self.execute(graph_fn, [])
# There are total 8 * 5 (neighbors) = 40 targets.
self.assertAllEqual(indices.shape, [40, 4])
self.assertAllEqual(offsets.shape, [40, 2])
self.assertAllEqual(weights.shape, [40])
# Only the last 5 (radius 1 generates 5 valid points) element has positive
# weight.
np.testing.assert_array_almost_equal([
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0
], weights)
# Validate the last element's (with neighbors) indices and offsets.
np.testing.assert_array_equal([0, 2, 2, 1], indices[35, :])
np.testing.assert_array_equal([0, 3, 1, 1], indices[36, :])
np.testing.assert_array_equal([0, 3, 2, 1], indices[37, :])
np.testing.assert_array_equal([0, 3, 3, 1], indices[38, :])
np.testing.assert_array_equal([0, 4, 2, 1], indices[39, :])
np.testing.assert_array_almost_equal([1.6, 0.4], offsets[35, :])
np.testing.assert_array_almost_equal([0.6, 1.4], offsets[36, :])
np.testing.assert_array_almost_equal([0.6, 0.4], offsets[37, :])
np.testing.assert_array_almost_equal([0.6, -0.6], offsets[38, :])
np.testing.assert_array_almost_equal([-0.4, 0.4], offsets[39, :])
def test_assign_joint_regression_targets(self):
def graph_fn():
gt_boxes_list = [
tf.constant(
np.array([[0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 1.0, 1.0]]),
dtype=tf.float32)
]
gt_classes_list = [
tf.one_hot([0, 1, 0, 1], depth=4),
]
coordinates = tf.expand_dims(
tf.constant(
np.array([[0.1, 0.2, 0.3, 0.4, 0.5],
[float('nan'), 0.7, float('nan'), 0.9, 0.4],
[0.4, 0.1, 0.4, 0.2, 0.0],
[float('nan'), 0.0, 0.12, 0.7, 0.4]]),
dtype=tf.float32),
axis=2)
gt_keypoints_list = [tf.concat([coordinates, coordinates], axis=2)]
cn_assigner = targetassigner.CenterNetKeypointTargetAssigner(
stride=4,
class_id=1,
keypoint_indices=[0, 2])
(indices, offsets, weights) = cn_assigner.assign_joint_regression_targets(
height=120,
width=80,
gt_keypoints_list=gt_keypoints_list,
gt_classes_list=gt_classes_list,
gt_boxes_list=gt_boxes_list)
return indices, offsets, weights
indices, offsets, weights = self.execute(graph_fn, [])
np.testing.assert_array_almost_equal(
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], weights)
np.testing.assert_array_equal([0, 15, 10, 1], indices[7, :])
np.testing.assert_array_almost_equal([-11.4, -7.6], offsets[7, :])
def test_assign_joint_regression_targets_radius(self):
def graph_fn():
gt_boxes_list = [
tf.constant(
np.array([[0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 1.0, 1.0]]),
dtype=tf.float32)
]
gt_classes_list = [
tf.one_hot([0, 1, 0, 1], depth=4),
]
coordinates = tf.expand_dims(
tf.constant(
np.array([[0.1, 0.2, 0.3, 0.4, 0.5],
[float('nan'), 0.7, float('nan'), 0.9, 0.4],
[0.4, 0.1, 0.4, 0.2, 0.0],
[float('nan'), 0.0, 0.12, 0.7, 0.4]]),
dtype=tf.float32),
axis=2)
gt_keypoints_list = [tf.concat([coordinates, coordinates], axis=2)]
cn_assigner = targetassigner.CenterNetKeypointTargetAssigner(
stride=4,
class_id=1,
keypoint_indices=[0, 2],
peak_radius=1)
(indices, offsets, weights) = cn_assigner.assign_joint_regression_targets(
height=120,
width=80,
gt_keypoints_list=gt_keypoints_list,
gt_classes_list=gt_classes_list,
gt_boxes_list=gt_boxes_list)
return indices, offsets, weights
indices, offsets, weights = self.execute(graph_fn, [])
# There are total 8 * 5 (neighbors) = 40 targets.
self.assertAllEqual(indices.shape, [40, 4])
self.assertAllEqual(offsets.shape, [40, 2])
self.assertAllEqual(weights.shape, [40])
# Only the last 5 (radius 1 generates 5 valid points) element has positive
# weight.
np.testing.assert_array_almost_equal([
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0
], weights)
# Test the values of the indices and offsets of the last 5 elements.
np.testing.assert_array_equal([0, 14, 10, 1], indices[35, :])
np.testing.assert_array_equal([0, 15, 9, 1], indices[36, :])
np.testing.assert_array_equal([0, 15, 10, 1], indices[37, :])
np.testing.assert_array_equal([0, 15, 11, 1], indices[38, :])
np.testing.assert_array_equal([0, 16, 10, 1], indices[39, :])
np.testing.assert_array_almost_equal([-10.4, -7.6], offsets[35, :])
np.testing.assert_array_almost_equal([-11.4, -6.6], offsets[36, :])
np.testing.assert_array_almost_equal([-11.4, -7.6], offsets[37, :])
np.testing.assert_array_almost_equal([-11.4, -8.6], offsets[38, :])
np.testing.assert_array_almost_equal([-12.4, -7.6], offsets[39, :])
class CenterNetMaskTargetAssignerTest(test_case.TestCase):
def test_assign_segmentation_targets(self):
def graph_fn():
gt_masks_list = [
# Example 0.
tf.constant([
[
[1., 0., 0., 0.],
[1., 1., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.],
],
[
[0., 0., 0., 0.],
[0., 0., 0., 1.],
[0., 0., 0., 0.],
[0., 0., 0., 0.],
],
[
[1., 1., 0., 0.],
[1., 1., 0., 0.],
[0., 0., 1., 1.],
[0., 0., 1., 1.],
]
], dtype=tf.float32),
# Example 1.
tf.constant([
[
[1., 1., 0., 1.],
[1., 1., 1., 1.],
[0., 0., 1., 1.],
[0., 0., 0., 1.],
],
[
[0., 0., 0., 0.],
[0., 0., 0., 0.],
[1., 1., 0., 0.],
[1., 1., 0., 0.],
],
], dtype=tf.float32),
]
gt_classes_list = [
# Example 0.
tf.constant([[1., 0., 0.],
[0., 1., 0.],
[1., 0., 0.]], dtype=tf.float32),
# Example 1.
tf.constant([[0., 1., 0.],
[0., 1., 0.]], dtype=tf.float32)
]
cn_assigner = targetassigner.CenterNetMaskTargetAssigner(stride=2)
segmentation_target = cn_assigner.assign_segmentation_targets(
gt_masks_list=gt_masks_list,
gt_classes_list=gt_classes_list,
mask_resize_method=targetassigner.ResizeMethod.NEAREST_NEIGHBOR)
return segmentation_target
segmentation_target = self.execute(graph_fn, [])
expected_seg_target = np.array([
# Example 0 [[class 0, class 1], [background, class 0]]
[[[1, 0, 0], [0, 1, 0]],
[[0, 0, 0], [1, 0, 0]]],
# Example 1 [[class 1, class 1], [class 1, class 1]]
[[[0, 1, 0], [0, 1, 0]],
[[0, 1, 0], [0, 1, 0]]],
], dtype=np.float32)
np.testing.assert_array_almost_equal(
expected_seg_target, segmentation_target)
if __name__ == '__main__':
tf.enable_v2_behavior()
......
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""A Beam job to add contextual memory banks to tf.Examples.
This tool groups images containing bounding boxes and embedded context features
by a key, either `image/location` or `image/seq_id`, and time horizon,
then uses these groups to build up a contextual memory bank from the embedded
context features from each image in the group and adds that context to the
output tf.Examples for each image in the group.
Steps to generate a dataset with context from one with bounding boxes and
embedded context features:
1. Use object/detection/export_inference_graph.py to get a `saved_model` for
inference. The input node must accept a tf.Example proto.
2. Run this tool with `saved_model` from step 1 and a TFRecord of tf.Example
protos containing images, bounding boxes, and embedded context features.
The context features can be added to tf.Examples using
generate_embedding_data.py.
Example Usage:
--------------
python add_context_to_examples.py \
--input_tfrecord path/to/input_tfrecords* \
--output_tfrecord path/to/output_tfrecords \
--sequence_key image/location \
--time_horizon month
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import copy
import datetime
import io
import itertools
import json
import os
from absl import app
from absl import flags
import apache_beam as beam
import numpy as np
import PIL.Image
import six
import tensorflow as tf
from apache_beam import runners
flags.DEFINE_string('input_tfrecord', None, 'TFRecord containing images in '
'tf.Example format for object detection, with bounding'
'boxes and contextual feature embeddings.')
flags.DEFINE_string('output_tfrecord', None,
'TFRecord containing images in tf.Example format, with '
'added contextual memory banks.')
flags.DEFINE_string('sequence_key', None, 'Key to use when grouping sequences: '
'so far supports `image/seq_id` and `image/location`.')
flags.DEFINE_string('time_horizon', None, 'What time horizon to use when '
'splitting the data, if any. Options are: `year`, `month`,'
' `week`, `day `, `hour`, `minute`, `None`.')
flags.DEFINE_integer('subsample_context_features_rate', 0, 'Whether to '
'subsample the context_features, and if so how many to '
'sample. If the rate is set to X, it will sample context '
'from 1 out of every X images. Default is sampling from '
'every image, which is X=0.')
flags.DEFINE_boolean('reduce_image_size', True, 'downsamples images to'
'have longest side max_image_dimension, maintaining aspect'
' ratio')
flags.DEFINE_integer('max_image_dimension', 1024, 'sets max image dimension')
flags.DEFINE_boolean('add_context_features', True, 'adds a memory bank of'
'embeddings to each clip')
flags.DEFINE_boolean('sorted_image_ids', True, 'whether the image source_ids '
'are sortable to deal with date_captured tie-breaks')
flags.DEFINE_string('image_ids_to_keep', 'All', 'path to .json list of image'
'ids to keep, used for ground truth eval creation')
flags.DEFINE_boolean('keep_context_features_image_id_list', False, 'Whether or '
'not to keep a list of the image_ids corresponding to the '
'memory bank')
flags.DEFINE_boolean('keep_only_positives', False, 'Whether or not to '
'keep only positive boxes based on score')
flags.DEFINE_boolean('keep_only_positives_gt', False, 'Whether or not to '
'keep only positive boxes based on gt class')
flags.DEFINE_float('context_features_score_threshold', 0.7, 'What score '
'threshold to use for boxes in context_features')
flags.DEFINE_integer('max_num_elements_in_context_features', 2000, 'Sets max '
'num elements per memory bank')
flags.DEFINE_integer('num_shards', 0, 'Number of output shards.')
flags.DEFINE_string('output_type', 'tf_sequence_example', 'Output type, one of '
'`tf_example`, `tf_sequence_example`')
flags.DEFINE_integer('max_clip_length', None, 'Max length for sequence '
'example outputs.')
FLAGS = flags.FLAGS
DEFAULT_FEATURE_LENGTH = 2057
class ReKeyDataFn(beam.DoFn):
"""Re-keys tfrecords by sequence_key.
This Beam DoFn re-keys the tfrecords by a user-defined sequence_key
"""
def __init__(self, sequence_key, time_horizon,
reduce_image_size, max_image_dimension):
"""Initialization function.
Args:
sequence_key: A feature name to use as a key for grouping sequences.
Must point to a key of type bytes_list
time_horizon: What length of time to use to partition the data when
building the memory banks. Options: `year`, `month`, `week`, `day `,
`hour`, `minute`, None
reduce_image_size: Whether to reduce the sizes of the stored images.
max_image_dimension: maximum dimension of reduced images
"""
self._sequence_key = sequence_key
if time_horizon is None or time_horizon in {'year', 'month', 'week', 'day',
'hour', 'minute'}:
self._time_horizon = time_horizon
else:
raise ValueError('Time horizon not supported.')
self._reduce_image_size = reduce_image_size
self._max_image_dimension = max_image_dimension
self._session = None
self._num_examples_processed = beam.metrics.Metrics.counter(
'data_rekey', 'num_tf_examples_processed')
self._num_images_resized = beam.metrics.Metrics.counter(
'data_rekey', 'num_images_resized')
self._num_images_read = beam.metrics.Metrics.counter(
'data_rekey', 'num_images_read')
self._num_images_found = beam.metrics.Metrics.counter(
'data_rekey', 'num_images_read')
self._num_got_shape = beam.metrics.Metrics.counter(
'data_rekey', 'num_images_got_shape')
self._num_images_found_size = beam.metrics.Metrics.counter(
'data_rekey', 'num_images_found_size')
self._num_examples_cleared = beam.metrics.Metrics.counter(
'data_rekey', 'num_examples_cleared')
self._num_examples_updated = beam.metrics.Metrics.counter(
'data_rekey', 'num_examples_updated')
def process(self, tfrecord_entry):
return self._rekey_examples(tfrecord_entry)
def _largest_size_at_most(self, height, width, largest_side):
"""Computes new shape with the largest side equal to `largest_side`.
Args:
height: an int indicating the current height.
width: an int indicating the current width.
largest_side: A python integer indicating the size of
the largest side after resize.
Returns:
new_height: an int indicating the new height.
new_width: an int indicating the new width.
"""
x_scale = float(largest_side) / float(width)
y_scale = float(largest_side) / float(height)
scale = min(x_scale, y_scale)
new_width = int(width * scale)
new_height = int(height * scale)
return new_height, new_width
def _resize_image(self, input_example):
"""Resizes the image within input_example and updates the height and width.
Args:
input_example: A tf.Example that we want to update to contain a resized
image.
Returns:
input_example: Updated tf.Example.
"""
original_image = copy.deepcopy(
input_example.features.feature['image/encoded'].bytes_list.value[0])
self._num_images_read.inc(1)
height = copy.deepcopy(
input_example.features.feature['image/height'].int64_list.value[0])
width = copy.deepcopy(
input_example.features.feature['image/width'].int64_list.value[0])
self._num_got_shape.inc(1)
new_height, new_width = self._largest_size_at_most(
height, width, self._max_image_dimension)
self._num_images_found_size.inc(1)
encoded_jpg_io = io.BytesIO(original_image)
image = PIL.Image.open(encoded_jpg_io)
resized_image = image.resize((new_width, new_height))
with io.BytesIO() as output:
resized_image.save(output, format='JPEG')
encoded_resized_image = output.getvalue()
self._num_images_resized.inc(1)
del input_example.features.feature['image/encoded'].bytes_list.value[:]
del input_example.features.feature['image/height'].int64_list.value[:]
del input_example.features.feature['image/width'].int64_list.value[:]
self._num_examples_cleared.inc(1)
input_example.features.feature['image/encoded'].bytes_list.value.extend(
[encoded_resized_image])
input_example.features.feature['image/height'].int64_list.value.extend(
[new_height])
input_example.features.feature['image/width'].int64_list.value.extend(
[new_width])
self._num_examples_updated.inc(1)
return input_example
def _rekey_examples(self, tfrecord_entry):
serialized_example = copy.deepcopy(tfrecord_entry)
input_example = tf.train.Example.FromString(serialized_example)
self._num_images_found.inc(1)
if self._reduce_image_size:
input_example = self._resize_image(input_example)
self._num_images_resized.inc(1)
new_key = input_example.features.feature[
self._sequence_key].bytes_list.value[0]
if self._time_horizon:
date_captured = datetime.datetime.strptime(
six.ensure_str(input_example.features.feature[
'image/date_captured'].bytes_list.value[0]), '%Y-%m-%d %H:%M:%S')
year = date_captured.year
month = date_captured.month
day = date_captured.day
week = np.floor(float(day) / float(7))
hour = date_captured.hour
minute = date_captured.minute
if self._time_horizon == 'year':
new_key = new_key + six.ensure_binary('/' + str(year))
elif self._time_horizon == 'month':
new_key = new_key + six.ensure_binary(
'/' + str(year) + '/' + str(month))
elif self._time_horizon == 'week':
new_key = new_key + six.ensure_binary(
'/' + str(year) + '/' + str(month) + '/' + str(week))
elif self._time_horizon == 'day':
new_key = new_key + six.ensure_binary(
'/' + str(year) + '/' + str(month) + '/' + str(day))
elif self._time_horizon == 'hour':
new_key = new_key + six.ensure_binary(
'/' + str(year) + '/' + str(month) + '/' + str(day) + '/' + (
str(hour)))
elif self._time_horizon == 'minute':
new_key = new_key + six.ensure_binary(
'/' + str(year) + '/' + str(month) + '/' + str(day) + '/' + (
str(hour) + '/' + str(minute)))
self._num_examples_processed.inc(1)
return [(new_key, input_example)]
class SortGroupedDataFn(beam.DoFn):
"""Sorts data within a keyed group.
This Beam DoFn sorts the grouped list of image examples by frame_num
"""
def __init__(self, sequence_key, sorted_image_ids,
max_num_elements_in_context_features):
"""Initialization function.
Args:
sequence_key: A feature name to use as a key for grouping sequences.
Must point to a key of type bytes_list
sorted_image_ids: Whether the image ids are sortable to use as sorting
tie-breakers
max_num_elements_in_context_features: The maximum number of elements
allowed in the memory bank
"""
self._session = None
self._num_examples_processed = beam.metrics.Metrics.counter(
'sort_group', 'num_groups_sorted')
self._too_many_elements = beam.metrics.Metrics.counter(
'sort_group', 'too_many_elements')
self._split_elements = beam.metrics.Metrics.counter(
'sort_group', 'split_elements')
self._sequence_key = six.ensure_binary(sequence_key)
self._sorted_image_ids = sorted_image_ids
self._max_num_elements_in_context_features = (
max_num_elements_in_context_features)
def process(self, grouped_entry):
return self._sort_image_examples(grouped_entry)
def _sort_image_examples(self, grouped_entry):
key, example_collection = grouped_entry
example_list = list(example_collection)
def get_frame_num(example):
return example.features.feature['image/seq_frame_num'].int64_list.value[0]
def get_date_captured(example):
return datetime.datetime.strptime(
six.ensure_str(
example.features.feature[
'image/date_captured'].bytes_list.value[0]),
'%Y-%m-%d %H:%M:%S')
def get_image_id(example):
return example.features.feature['image/source_id'].bytes_list.value[0]
if self._sequence_key == six.ensure_binary('image/seq_id'):
sorting_fn = get_frame_num
elif self._sequence_key == six.ensure_binary('image/location'):
if self._sorted_image_ids:
sorting_fn = get_image_id
else:
sorting_fn = get_date_captured
sorted_example_list = sorted(example_list, key=sorting_fn)
self._num_examples_processed.inc(1)
if len(sorted_example_list) > self._max_num_elements_in_context_features:
leftovers = sorted_example_list
output_list = []
count = 0
self._too_many_elements.inc(1)
while len(leftovers) > self._max_num_elements_in_context_features:
self._split_elements.inc(1)
new_key = key + six.ensure_binary('_' + str(count))
new_list = leftovers[:self._max_num_elements_in_context_features]
output_list.append((new_key, new_list))
leftovers = leftovers[:self._max_num_elements_in_context_features]
count += 1
else:
output_list = [(key, sorted_example_list)]
return output_list
def get_sliding_window(example_list, max_clip_length, stride_length):
"""Yields a sliding window over data from example_list.
Sliding window has width max_clip_len (n) and stride stride_len (m).
s -> (s0,s1,...s[n-1]), (s[m],s[m+1],...,s[m+n]), ...
Args:
example_list: A list of examples.
max_clip_length: The maximum length of each clip.
stride_length: The stride between each clip.
Yields:
A list of lists of examples, each with length <= max_clip_length
"""
# check if the list is too short to slide over
if len(example_list) < max_clip_length:
yield example_list
else:
starting_values = [i*stride_length for i in
range(len(example_list)) if
len(example_list) > i*stride_length]
for start in starting_values:
result = tuple(itertools.islice(example_list, start,
min(start + max_clip_length,
len(example_list))))
yield result
class GenerateContextFn(beam.DoFn):
"""Generates context data for camera trap images.
This Beam DoFn builds up contextual memory banks from groups of images and
stores them in the output tf.Example or tf.Sequence_example for each image.
"""
def __init__(self, sequence_key, add_context_features, image_ids_to_keep,
keep_context_features_image_id_list=False,
subsample_context_features_rate=0,
keep_only_positives=False,
context_features_score_threshold=0.7,
keep_only_positives_gt=False,
max_num_elements_in_context_features=5000,
pad_context_features=False,
output_type='tf_example', max_clip_length=None):
"""Initialization function.
Args:
sequence_key: A feature name to use as a key for grouping sequences.
add_context_features: Whether to keep and store the contextual memory
bank.
image_ids_to_keep: A list of image ids to save, to use to build data
subsets for evaluation.
keep_context_features_image_id_list: Whether to save an ordered list of
the ids of the images in the contextual memory bank.
subsample_context_features_rate: What rate to subsample images for the
contextual memory bank.
keep_only_positives: Whether to only keep high scoring
(>context_features_score_threshold) features in the contextual memory
bank.
context_features_score_threshold: What threshold to use for keeping
features.
keep_only_positives_gt: Whether to only keep features from images that
contain objects based on the ground truth (for training).
max_num_elements_in_context_features: the maximum number of elements in
the memory bank
pad_context_features: Whether to pad the context features to a fixed size.
output_type: What type of output, tf_example of tf_sequence_example
max_clip_length: The maximum length of a sequence example, before
splitting into multiple
"""
self._session = None
self._num_examples_processed = beam.metrics.Metrics.counter(
'sequence_data_generation', 'num_seq_examples_processed')
self._num_keys_processed = beam.metrics.Metrics.counter(
'sequence_data_generation', 'num_keys_processed')
self._sequence_key = sequence_key
self._add_context_features = add_context_features
self._pad_context_features = pad_context_features
self._output_type = output_type
self._max_clip_length = max_clip_length
if six.ensure_str(image_ids_to_keep) == 'All':
self._image_ids_to_keep = None
else:
with tf.io.gfile.GFile(image_ids_to_keep) as f:
self._image_ids_to_keep = json.load(f)
self._keep_context_features_image_id_list = (
keep_context_features_image_id_list)
self._subsample_context_features_rate = subsample_context_features_rate
self._keep_only_positives = keep_only_positives
self._keep_only_positives_gt = keep_only_positives_gt
self._context_features_score_threshold = context_features_score_threshold
self._max_num_elements_in_context_features = (
max_num_elements_in_context_features)
self._images_kept = beam.metrics.Metrics.counter(
'sequence_data_generation', 'images_kept')
self._images_loaded = beam.metrics.Metrics.counter(
'sequence_data_generation', 'images_loaded')
def process(self, grouped_entry):
return self._add_context_to_example(copy.deepcopy(grouped_entry))
def _build_context_features(self, example_list):
context_features = []
context_features_image_id_list = []
count = 0
example_embedding = []
for idx, example in enumerate(example_list):
if self._subsample_context_features_rate > 0:
if (idx % self._subsample_context_features_rate) != 0:
example.features.feature[
'context_features_idx'].int64_list.value.append(
self._max_num_elements_in_context_features + 1)
continue
if self._keep_only_positives:
if example.features.feature[
'image/embedding_score'
].float_list.value[0] < self._context_features_score_threshold:
example.features.feature[
'context_features_idx'].int64_list.value.append(
self._max_num_elements_in_context_features + 1)
continue
if self._keep_only_positives_gt:
if len(example.features.feature[
'image/object/bbox/xmin'
].float_list.value) < 1:
example.features.feature[
'context_features_idx'].int64_list.value.append(
self._max_num_elements_in_context_features + 1)
continue
example_embedding = list(example.features.feature[
'image/embedding'].float_list.value)
context_features.extend(example_embedding)
example.features.feature[
'context_features_idx'].int64_list.value.append(count)
count += 1
example_image_id = example.features.feature[
'image/source_id'].bytes_list.value[0]
context_features_image_id_list.append(example_image_id)
if not example_embedding:
example_embedding.append(np.zeros(DEFAULT_FEATURE_LENGTH))
feature_length = DEFAULT_FEATURE_LENGTH
# If the example_list is not empty and image/embedding_length is in the
# featture dict, feature_length will be assigned to that. Otherwise, it will
# be kept as default.
if example_list and (
'image/embedding_length' in example_list[0].features.feature):
feature_length = example_list[0].features.feature[
'image/embedding_length'].int64_list.value[0]
if self._pad_context_features:
while len(context_features_image_id_list) < (
self._max_num_elements_in_context_features):
context_features_image_id_list.append('')
return context_features, feature_length, context_features_image_id_list
def _add_context_to_example(self, grouped_entry):
key, example_collection = grouped_entry
list_of_examples = []
example_list = list(example_collection)
if self._add_context_features:
context_features, feature_length, context_features_image_id_list = (
self._build_context_features(example_list))
if self._image_ids_to_keep is not None:
new_example_list = []
for example in example_list:
im_id = example.features.feature['image/source_id'].bytes_list.value[0]
self._images_loaded.inc(1)
if six.ensure_str(im_id) in self._image_ids_to_keep:
self._images_kept.inc(1)
new_example_list.append(example)
if new_example_list:
example_list = new_example_list
else:
return []
if self._output_type == 'tf_sequence_example':
if self._max_clip_length is not None:
# For now, no overlap
clips = get_sliding_window(
example_list, self._max_clip_length, self._max_clip_length)
else:
clips = [example_list]
for clip_num, clip_list in enumerate(clips):
# initialize sequence example
seq_example = tf.train.SequenceExample()
video_id = six.ensure_str(key)+'_'+ str(clip_num)
seq_example.context.feature['clip/media_id'].bytes_list.value.append(
video_id.encode('utf8'))
seq_example.context.feature['clip/frames'].int64_list.value.append(
len(clip_list))
seq_example.context.feature[
'clip/start/timestamp'].int64_list.value.append(0)
seq_example.context.feature[
'clip/end/timestamp'].int64_list.value.append(len(clip_list))
seq_example.context.feature['image/format'].bytes_list.value.append(
six.ensure_binary('JPG'))
seq_example.context.feature['image/channels'].int64_list.value.append(3)
context_example = clip_list[0]
seq_example.context.feature['image/height'].int64_list.value.append(
context_example.features.feature[
'image/height'].int64_list.value[0])
seq_example.context.feature['image/width'].int64_list.value.append(
context_example.features.feature['image/width'].int64_list.value[0])
seq_example.context.feature[
'image/context_feature_length'].int64_list.value.append(
feature_length)
seq_example.context.feature[
'image/context_features'].float_list.value.extend(
context_features)
if self._keep_context_features_image_id_list:
seq_example.context.feature[
'image/context_features_image_id_list'].bytes_list.value.extend(
context_features_image_id_list)
encoded_image_list = seq_example.feature_lists.feature_list[
'image/encoded']
timestamps_list = seq_example.feature_lists.feature_list[
'image/timestamp']
context_features_idx_list = seq_example.feature_lists.feature_list[
'image/context_features_idx']
date_captured_list = seq_example.feature_lists.feature_list[
'image/date_captured']
unix_time_list = seq_example.feature_lists.feature_list[
'image/unix_time']
location_list = seq_example.feature_lists.feature_list['image/location']
image_ids_list = seq_example.feature_lists.feature_list[
'image/source_id']
gt_xmin_list = seq_example.feature_lists.feature_list[
'region/bbox/xmin']
gt_xmax_list = seq_example.feature_lists.feature_list[
'region/bbox/xmax']
gt_ymin_list = seq_example.feature_lists.feature_list[
'region/bbox/ymin']
gt_ymax_list = seq_example.feature_lists.feature_list[
'region/bbox/ymax']
gt_type_list = seq_example.feature_lists.feature_list[
'region/label/index']
gt_type_string_list = seq_example.feature_lists.feature_list[
'region/label/string']
gt_is_annotated_list = seq_example.feature_lists.feature_list[
'region/is_annotated']
for idx, example in enumerate(clip_list):
encoded_image = encoded_image_list.feature.add()
encoded_image.bytes_list.value.extend(
example.features.feature['image/encoded'].bytes_list.value)
image_id = image_ids_list.feature.add()
image_id.bytes_list.value.append(
example.features.feature['image/source_id'].bytes_list.value[0])
timestamp = timestamps_list.feature.add()
# Timestamp is currently order in the list.
timestamp.int64_list.value.extend([idx])
context_features_idx = context_features_idx_list.feature.add()
context_features_idx.int64_list.value.extend(
example.features.feature['context_features_idx'].int64_list.value)
date_captured = date_captured_list.feature.add()
date_captured.bytes_list.value.extend(
example.features.feature['image/date_captured'].bytes_list.value)
unix_time = unix_time_list.feature.add()
unix_time.float_list.value.extend(
example.features.feature['image/unix_time'].float_list.value)
location = location_list.feature.add()
location.bytes_list.value.extend(
example.features.feature['image/location'].bytes_list.value)
gt_xmin = gt_xmin_list.feature.add()
gt_xmax = gt_xmax_list.feature.add()
gt_ymin = gt_ymin_list.feature.add()
gt_ymax = gt_ymax_list.feature.add()
gt_type = gt_type_list.feature.add()
gt_type_str = gt_type_string_list.feature.add()
gt_is_annotated = gt_is_annotated_list.feature.add()
gt_is_annotated.int64_list.value.append(1)
gt_xmin.float_list.value.extend(
example.features.feature[
'image/object/bbox/xmin'].float_list.value)
gt_xmax.float_list.value.extend(
example.features.feature[
'image/object/bbox/xmax'].float_list.value)
gt_ymin.float_list.value.extend(
example.features.feature[
'image/object/bbox/ymin'].float_list.value)
gt_ymax.float_list.value.extend(
example.features.feature[
'image/object/bbox/ymax'].float_list.value)
gt_type.int64_list.value.extend(
example.features.feature[
'image/object/class/label'].int64_list.value)
gt_type_str.bytes_list.value.extend(
example.features.feature[
'image/object/class/text'].bytes_list.value)
self._num_examples_processed.inc(1)
list_of_examples.append(seq_example)
elif self._output_type == 'tf_example':
for example in example_list:
im_id = example.features.feature['image/source_id'].bytes_list.value[0]
if self._add_context_features:
example.features.feature[
'image/context_features'].float_list.value.extend(
context_features)
example.features.feature[
'image/context_feature_length'].int64_list.value.append(
feature_length)
if self._keep_context_features_image_id_list:
example.features.feature[
'image/context_features_image_id_list'].bytes_list.value.extend(
context_features_image_id_list)
self._num_examples_processed.inc(1)
list_of_examples.append(example)
return list_of_examples
def construct_pipeline(input_tfrecord,
output_tfrecord,
sequence_key,
time_horizon=None,
subsample_context_features_rate=0,
reduce_image_size=True,
max_image_dimension=1024,
add_context_features=True,
sorted_image_ids=True,
image_ids_to_keep='All',
keep_context_features_image_id_list=False,
keep_only_positives=False,
context_features_score_threshold=0.7,
keep_only_positives_gt=False,
max_num_elements_in_context_features=5000,
num_shards=0,
output_type='tf_example',
max_clip_length=None):
"""Returns a beam pipeline to run object detection inference.
Args:
input_tfrecord: An TFRecord of tf.train.Example protos containing images.
output_tfrecord: An TFRecord of tf.train.Example protos that contain images
in the input TFRecord and the detections from the model.
sequence_key: A feature name to use as a key for grouping sequences.
time_horizon: What length of time to use to partition the data when building
the memory banks. Options: `year`, `month`, `week`, `day `, `hour`,
`minute`, None.
subsample_context_features_rate: What rate to subsample images for the
contextual memory bank.
reduce_image_size: Whether to reduce the size of the stored images.
max_image_dimension: The maximum image dimension to use for resizing.
add_context_features: Whether to keep and store the contextual memory bank.
sorted_image_ids: Whether the image ids are sortable, and can be used as
datetime tie-breakers when building memory banks.
image_ids_to_keep: A list of image ids to save, to use to build data subsets
for evaluation.
keep_context_features_image_id_list: Whether to save an ordered list of the
ids of the images in the contextual memory bank.
keep_only_positives: Whether to only keep high scoring
(>context_features_score_threshold) features in the contextual memory
bank.
context_features_score_threshold: What threshold to use for keeping
features.
keep_only_positives_gt: Whether to only keep features from images that
contain objects based on the ground truth (for training).
max_num_elements_in_context_features: the maximum number of elements in the
memory bank
num_shards: The number of output shards.
output_type: What type of output, tf_example of tf_sequence_example
max_clip_length: The maximum length of a sequence example, before
splitting into multiple
"""
def pipeline(root):
if output_type == 'tf_example':
coder = beam.coders.ProtoCoder(tf.train.Example)
elif output_type == 'tf_sequence_example':
coder = beam.coders.ProtoCoder(tf.train.SequenceExample)
else:
raise ValueError('Unsupported output type.')
input_collection = (
root | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
input_tfrecord,
coder=beam.coders.BytesCoder()))
rekey_collection = input_collection | 'RekeyExamples' >> beam.ParDo(
ReKeyDataFn(sequence_key, time_horizon,
reduce_image_size, max_image_dimension))
grouped_collection = (
rekey_collection | 'GroupBySequenceKey' >> beam.GroupByKey())
grouped_collection = (
grouped_collection | 'ReshuffleGroups' >> beam.Reshuffle())
ordered_collection = (
grouped_collection | 'OrderByFrameNumber' >> beam.ParDo(
SortGroupedDataFn(sequence_key, sorted_image_ids,
max_num_elements_in_context_features)))
ordered_collection = (
ordered_collection | 'ReshuffleSortedGroups' >> beam.Reshuffle())
output_collection = (
ordered_collection | 'AddContextToExamples' >> beam.ParDo(
GenerateContextFn(
sequence_key, add_context_features, image_ids_to_keep,
keep_context_features_image_id_list=(
keep_context_features_image_id_list),
subsample_context_features_rate=subsample_context_features_rate,
keep_only_positives=keep_only_positives,
keep_only_positives_gt=keep_only_positives_gt,
context_features_score_threshold=(
context_features_score_threshold),
max_num_elements_in_context_features=(
max_num_elements_in_context_features),
output_type=output_type,
max_clip_length=max_clip_length)))
output_collection = (
output_collection | 'ReshuffleExamples' >> beam.Reshuffle())
_ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
output_tfrecord,
num_shards=num_shards,
coder=coder)
return pipeline
def main(_):
"""Runs the Beam pipeline that builds context features.
Args:
_: unused
"""
# must create before flags are used
runner = runners.DirectRunner()
dirname = os.path.dirname(FLAGS.output_tfrecord)
tf.io.gfile.makedirs(dirname)
runner.run(
construct_pipeline(FLAGS.input_tfrecord,
FLAGS.output_tfrecord,
FLAGS.sequence_key,
FLAGS.time_horizon,
FLAGS.subsample_context_features_rate,
FLAGS.reduce_image_size,
FLAGS.max_image_dimension,
FLAGS.add_context_features,
FLAGS.sorted_image_ids,
FLAGS.image_ids_to_keep,
FLAGS.keep_context_features_image_id_list,
FLAGS.keep_only_positives,
FLAGS.context_features_score_threshold,
FLAGS.keep_only_positives_gt,
FLAGS.max_num_elements_in_context_features,
FLAGS.num_shards,
FLAGS.output_type,
FLAGS.max_clip_length))
if __name__ == '__main__':
flags.mark_flags_as_required([
'input_tfrecord',
'output_tfrecord'
])
app.run(main)
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for add_context_to_examples."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import contextlib
import datetime
import os
import tempfile
import unittest
import numpy as np
import six
import tensorflow.compat.v1 as tf
from object_detection.dataset_tools.context_rcnn import add_context_to_examples
from object_detection.utils import tf_version
from apache_beam import runners
@contextlib.contextmanager
def InMemoryTFRecord(entries):
temp = tempfile.NamedTemporaryFile(delete=False)
filename = temp.name
try:
with tf.python_io.TFRecordWriter(filename) as writer:
for value in entries:
writer.write(value)
yield filename
finally:
os.unlink(temp.name)
def BytesFeature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def BytesListFeature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
def Int64Feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def Int64ListFeature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
def FloatListFeature(value):
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class GenerateContextDataTest(tf.test.TestCase):
def _create_first_tf_example(self):
with self.test_session():
encoded_image = tf.image.encode_jpeg(
tf.constant(np.ones((4, 4, 3)).astype(np.uint8))).eval()
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': BytesFeature(encoded_image),
'image/source_id': BytesFeature(six.ensure_binary('image_id_1')),
'image/height': Int64Feature(4),
'image/width': Int64Feature(4),
'image/object/class/label': Int64ListFeature([5, 5]),
'image/object/class/text': BytesListFeature([six.ensure_binary('hyena'),
six.ensure_binary('hyena')
]),
'image/object/bbox/xmin': FloatListFeature([0.0, 0.1]),
'image/object/bbox/xmax': FloatListFeature([0.2, 0.3]),
'image/object/bbox/ymin': FloatListFeature([0.4, 0.5]),
'image/object/bbox/ymax': FloatListFeature([0.6, 0.7]),
'image/seq_id': BytesFeature(six.ensure_binary('01')),
'image/seq_num_frames': Int64Feature(2),
'image/seq_frame_num': Int64Feature(0),
'image/date_captured': BytesFeature(
six.ensure_binary(str(datetime.datetime(2020, 1, 1, 1, 0, 0)))),
'image/embedding': FloatListFeature([0.1, 0.2, 0.3]),
'image/embedding_score': FloatListFeature([0.9]),
'image/embedding_length': Int64Feature(3)
}))
return example.SerializeToString()
def _create_second_tf_example(self):
with self.test_session():
encoded_image = tf.image.encode_jpeg(
tf.constant(np.ones((4, 4, 3)).astype(np.uint8))).eval()
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': BytesFeature(encoded_image),
'image/source_id': BytesFeature(six.ensure_binary('image_id_2')),
'image/height': Int64Feature(4),
'image/width': Int64Feature(4),
'image/object/class/label': Int64ListFeature([5]),
'image/object/class/text': BytesListFeature([six.ensure_binary('hyena')
]),
'image/object/bbox/xmin': FloatListFeature([0.0]),
'image/object/bbox/xmax': FloatListFeature([0.1]),
'image/object/bbox/ymin': FloatListFeature([0.2]),
'image/object/bbox/ymax': FloatListFeature([0.3]),
'image/seq_id': BytesFeature(six.ensure_binary('01')),
'image/seq_num_frames': Int64Feature(2),
'image/seq_frame_num': Int64Feature(1),
'image/date_captured': BytesFeature(
six.ensure_binary(str(datetime.datetime(2020, 1, 1, 1, 1, 0)))),
'image/embedding': FloatListFeature([0.4, 0.5, 0.6]),
'image/embedding_score': FloatListFeature([0.9]),
'image/embedding_length': Int64Feature(3)
}))
return example.SerializeToString()
def assert_expected_examples(self, tf_example_list):
self.assertAllEqual(
{tf_example.features.feature['image/source_id'].bytes_list.value[0]
for tf_example in tf_example_list},
{six.ensure_binary('image_id_1'), six.ensure_binary('image_id_2')})
self.assertAllClose(
tf_example_list[0].features.feature[
'image/context_features'].float_list.value,
[0.1, 0.2, 0.3, 0.4, 0.5, 0.6])
self.assertAllClose(
tf_example_list[1].features.feature[
'image/context_features'].float_list.value,
[0.1, 0.2, 0.3, 0.4, 0.5, 0.6])
def assert_expected_sequence_example(self, tf_sequence_example_list):
tf_sequence_example = tf_sequence_example_list[0]
num_frames = 2
self.assertAllEqual(
tf_sequence_example.context.feature[
'clip/media_id'].bytes_list.value[0], six.ensure_binary(
'01_0'))
self.assertAllClose(
tf_sequence_example.context.feature[
'image/context_features'].float_list.value,
[0.1, 0.2, 0.3, 0.4, 0.5, 0.6])
seq_feature_dict = tf_sequence_example.feature_lists.feature_list
self.assertLen(
seq_feature_dict['image/encoded'].feature[:],
num_frames)
actual_timestamps = [
feature.int64_list.value[0] for feature
in seq_feature_dict['image/timestamp'].feature]
timestamps = [0, 1]
self.assertAllEqual(timestamps, actual_timestamps)
# First image.
self.assertAllClose(
[0.4, 0.5],
seq_feature_dict['region/bbox/ymin'].feature[0].float_list.value[:])
self.assertAllClose(
[0.0, 0.1],
seq_feature_dict['region/bbox/xmin'].feature[0].float_list.value[:])
self.assertAllClose(
[0.6, 0.7],
seq_feature_dict['region/bbox/ymax'].feature[0].float_list.value[:])
self.assertAllClose(
[0.2, 0.3],
seq_feature_dict['region/bbox/xmax'].feature[0].float_list.value[:])
self.assertAllEqual(
[six.ensure_binary('hyena'), six.ensure_binary('hyena')],
seq_feature_dict['region/label/string'].feature[0].bytes_list.value[:])
# Second example.
self.assertAllClose(
[0.2],
seq_feature_dict['region/bbox/ymin'].feature[1].float_list.value[:])
self.assertAllClose(
[0.0],
seq_feature_dict['region/bbox/xmin'].feature[1].float_list.value[:])
self.assertAllClose(
[0.3],
seq_feature_dict['region/bbox/ymax'].feature[1].float_list.value[:])
self.assertAllClose(
[0.1],
seq_feature_dict['region/bbox/xmax'].feature[1].float_list.value[:])
self.assertAllEqual(
[six.ensure_binary('hyena')],
seq_feature_dict['region/label/string'].feature[1].bytes_list.value[:])
def assert_expected_key(self, key):
self.assertAllEqual(key, '01')
def assert_sorted(self, example_collection):
example_list = list(example_collection)
counter = 0
for example in example_list:
frame_num = example.features.feature[
'image/seq_frame_num'].int64_list.value[0]
self.assertGreaterEqual(frame_num, counter)
counter = frame_num
def assert_context(self, example_collection):
example_list = list(example_collection)
for example in example_list:
context = example.features.feature[
'image/context_features'].float_list.value
self.assertAllClose([0.1, 0.2, 0.3, 0.4, 0.5, 0.6], context)
def assert_resized(self, example):
width = example.features.feature['image/width'].int64_list.value[0]
self.assertAllEqual(width, 2)
height = example.features.feature['image/height'].int64_list.value[0]
self.assertAllEqual(height, 2)
def assert_size(self, example):
width = example.features.feature['image/width'].int64_list.value[0]
self.assertAllEqual(width, 4)
height = example.features.feature['image/height'].int64_list.value[0]
self.assertAllEqual(height, 4)
def test_sliding_window(self):
example_list = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
max_clip_length = 3
stride_length = 3
out_list = [list(i) for i in add_context_to_examples.get_sliding_window(
example_list, max_clip_length, stride_length)]
self.assertAllEqual(out_list, [['a', 'b', 'c'],
['d', 'e', 'f'],
['g']])
def test_rekey_data_fn(self):
sequence_key = 'image/seq_id'
time_horizon = None
reduce_image_size = False
max_dim = None
rekey_fn = add_context_to_examples.ReKeyDataFn(
sequence_key, time_horizon,
reduce_image_size, max_dim)
output = rekey_fn.process(self._create_first_tf_example())
self.assert_expected_key(output[0][0])
self.assert_size(output[0][1])
def test_rekey_data_fn_w_resize(self):
sequence_key = 'image/seq_id'
time_horizon = None
reduce_image_size = True
max_dim = 2
rekey_fn = add_context_to_examples.ReKeyDataFn(
sequence_key, time_horizon,
reduce_image_size, max_dim)
output = rekey_fn.process(self._create_first_tf_example())
self.assert_expected_key(output[0][0])
self.assert_resized(output[0][1])
def test_sort_fn(self):
sequence_key = 'image/seq_id'
sorted_image_ids = False
max_num_elements_in_context_features = 10
sort_fn = add_context_to_examples.SortGroupedDataFn(
sequence_key, sorted_image_ids, max_num_elements_in_context_features)
output = sort_fn.process(
('dummy_key', [tf.train.Example.FromString(
self._create_second_tf_example()),
tf.train.Example.FromString(
self._create_first_tf_example())]))
self.assert_sorted(output[0][1])
def test_add_context_fn(self):
sequence_key = 'image/seq_id'
add_context_features = True
image_ids_to_keep = 'All'
context_fn = add_context_to_examples.GenerateContextFn(
sequence_key, add_context_features, image_ids_to_keep)
output = context_fn.process(
('dummy_key', [tf.train.Example.FromString(
self._create_first_tf_example()),
tf.train.Example.FromString(
self._create_second_tf_example())]))
self.assertEqual(len(output), 2)
self.assert_context(output)
def test_add_context_fn_output_sequence_example(self):
sequence_key = 'image/seq_id'
add_context_features = True
image_ids_to_keep = 'All'
context_fn = add_context_to_examples.GenerateContextFn(
sequence_key, add_context_features, image_ids_to_keep,
output_type='tf_sequence_example')
output = context_fn.process(
('01',
[tf.train.Example.FromString(self._create_first_tf_example()),
tf.train.Example.FromString(self._create_second_tf_example())]))
self.assertEqual(len(output), 1)
self.assert_expected_sequence_example(output)
def test_add_context_fn_output_sequence_example_cliplen(self):
sequence_key = 'image/seq_id'
add_context_features = True
image_ids_to_keep = 'All'
context_fn = add_context_to_examples.GenerateContextFn(
sequence_key, add_context_features, image_ids_to_keep,
output_type='tf_sequence_example', max_clip_length=1)
output = context_fn.process(
('01',
[tf.train.Example.FromString(self._create_first_tf_example()),
tf.train.Example.FromString(self._create_second_tf_example())]))
self.assertEqual(len(output), 2)
def test_beam_pipeline(self):
with InMemoryTFRecord(
[self._create_first_tf_example(),
self._create_second_tf_example()]) as input_tfrecord:
runner = runners.DirectRunner()
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
sequence_key = six.ensure_binary('image/seq_id')
max_num_elements = 10
num_shards = 1
pipeline = add_context_to_examples.construct_pipeline(
input_tfrecord,
output_tfrecord,
sequence_key,
max_num_elements_in_context_features=max_num_elements,
num_shards=num_shards)
runner.run(pipeline)
filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
actual_output = []
record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
for record in record_iterator:
actual_output.append(record)
self.assertEqual(len(actual_output), 2)
self.assert_expected_examples([tf.train.Example.FromString(
tf_example) for tf_example in actual_output])
def test_beam_pipeline_sequence_example(self):
with InMemoryTFRecord(
[self._create_first_tf_example(),
self._create_second_tf_example()]) as input_tfrecord:
runner = runners.DirectRunner()
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
sequence_key = six.ensure_binary('image/seq_id')
max_num_elements = 10
num_shards = 1
pipeline = add_context_to_examples.construct_pipeline(
input_tfrecord,
output_tfrecord,
sequence_key,
max_num_elements_in_context_features=max_num_elements,
num_shards=num_shards,
output_type='tf_sequence_example')
runner.run(pipeline)
filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
actual_output = []
record_iterator = tf.python_io.tf_record_iterator(
path=filenames[0])
for record in record_iterator:
actual_output.append(record)
self.assertEqual(len(actual_output), 1)
self.assert_expected_sequence_example(
[tf.train.SequenceExample.FromString(
tf_example) for tf_example in actual_output])
if __name__ == '__main__':
tf.test.main()
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Beam pipeline to create COCO Camera Traps Object Detection TFRecords.
Please note that this tool creates sharded output files.
This tool assumes the input annotations are in the COCO Camera Traps json
format, specified here:
https://github.com/Microsoft/CameraTraps/blob/master/data_management/README.md
Example usage:
python create_cococameratraps_tfexample_main.py \
--alsologtostderr \
--output_tfrecord_prefix="/path/to/output/tfrecord/location/prefix" \
--image_directory="/path/to/image/folder/" \
--input_annotations_file="path/to/annotations.json"
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import hashlib
import io
import json
import logging
import os
from absl import app
from absl import flags
import apache_beam as beam
import numpy as np
import PIL.Image
import tensorflow.compat.v1 as tf
from apache_beam import runners
from object_detection.utils import dataset_util
flags.DEFINE_string('image_directory', None, 'Directory where images are '
'stored')
flags.DEFINE_string('output_tfrecord_prefix', None,
'TFRecord containing images in tf.Example format.')
flags.DEFINE_string('input_annotations_file', None, 'Path to Coco-CameraTraps'
'style annotations file')
flags.DEFINE_integer('num_images_per_shard',
200,
'The number of images to be stored in each shard.')
FLAGS = flags.FLAGS
class ParseImage(beam.DoFn):
"""A DoFn that parses a COCO-CameraTraps json and emits TFRecords."""
def __init__(self, image_directory, images, annotations, categories,
keep_bboxes):
"""Initialization function.
Args:
image_directory: Path to image directory
images: list of COCO Camera Traps style image dictionaries
annotations: list of COCO Camera Traps style annotation dictionaries
categories: list of COCO Camera Traps style category dictionaries
keep_bboxes: Whether to keep any bounding boxes that exist in the
annotations
"""
self._image_directory = image_directory
self._image_dict = {im['id']: im for im in images}
self._annotation_dict = {im['id']: [] for im in images}
self._category_dict = {int(cat['id']): cat for cat in categories}
for ann in annotations:
self._annotation_dict[ann['image_id']].append(ann)
self._images = images
self._keep_bboxes = keep_bboxes
self._num_examples_processed = beam.metrics.Metrics.counter(
'cococameratraps_data_generation', 'num_tf_examples_processed')
def process(self, image_id):
"""Builds a tf.Example given an image id.
Args:
image_id: the image id of the associated image
Returns:
List of tf.Examples.
"""
image = self._image_dict[image_id]
annotations = self._annotation_dict[image_id]
image_height = image['height']
image_width = image['width']
filename = image['file_name']
image_id = image['id']
image_location_id = image['location']
image_datetime = str(image['date_captured'])
image_sequence_id = str(image['seq_id'])
image_sequence_num_frames = int(image['seq_num_frames'])
image_sequence_frame_num = int(image['frame_num'])
full_path = os.path.join(self._image_directory, filename)
try:
# Ensure the image exists and is not corrupted
with tf.io.gfile.GFile(full_path, 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = PIL.Image.open(encoded_jpg_io)
# Ensure the image can be read by tf
with tf.Graph().as_default():
image = tf.image.decode_jpeg(encoded_jpg, channels=3)
init_op = tf.initialize_all_tables()
with tf.Session() as sess:
sess.run(init_op)
sess.run(image)
except Exception as e: # pylint: disable=broad-except
# The image file is missing or corrupt
tf.logging.error(str(e))
return []
key = hashlib.sha256(encoded_jpg).hexdigest()
feature_dict = {
'image/height':
dataset_util.int64_feature(image_height),
'image/width':
dataset_util.int64_feature(image_width),
'image/filename':
dataset_util.bytes_feature(filename.encode('utf8')),
'image/source_id':
dataset_util.bytes_feature(str(image_id).encode('utf8')),
'image/key/sha256':
dataset_util.bytes_feature(key.encode('utf8')),
'image/encoded':
dataset_util.bytes_feature(encoded_jpg),
'image/format':
dataset_util.bytes_feature('jpeg'.encode('utf8')),
'image/location':
dataset_util.bytes_feature(str(image_location_id).encode('utf8')),
'image/seq_num_frames':
dataset_util.int64_feature(image_sequence_num_frames),
'image/seq_frame_num':
dataset_util.int64_feature(image_sequence_frame_num),
'image/seq_id':
dataset_util.bytes_feature(image_sequence_id.encode('utf8')),
'image/date_captured':
dataset_util.bytes_feature(image_datetime.encode('utf8'))
}
num_annotations_skipped = 0
if annotations:
xmin = []
xmax = []
ymin = []
ymax = []
category_names = []
category_ids = []
area = []
for object_annotations in annotations:
if 'bbox' in object_annotations and self._keep_bboxes:
(x, y, width, height) = tuple(object_annotations['bbox'])
if width <= 0 or height <= 0:
num_annotations_skipped += 1
continue
if x + width > image_width or y + height > image_height:
num_annotations_skipped += 1
continue
xmin.append(float(x) / image_width)
xmax.append(float(x + width) / image_width)
ymin.append(float(y) / image_height)
ymax.append(float(y + height) / image_height)
if 'area' in object_annotations:
area.append(object_annotations['area'])
else:
# approximate area using l*w/2
area.append(width*height/2.0)
category_id = int(object_annotations['category_id'])
category_ids.append(category_id)
category_names.append(
self._category_dict[category_id]['name'].encode('utf8'))
feature_dict.update({
'image/object/bbox/xmin':
dataset_util.float_list_feature(xmin),
'image/object/bbox/xmax':
dataset_util.float_list_feature(xmax),
'image/object/bbox/ymin':
dataset_util.float_list_feature(ymin),
'image/object/bbox/ymax':
dataset_util.float_list_feature(ymax),
'image/object/class/text':
dataset_util.bytes_list_feature(category_names),
'image/object/class/label':
dataset_util.int64_list_feature(category_ids),
'image/object/area':
dataset_util.float_list_feature(area),
})
# For classification, add the first category to image/class/label and
# image/class/text
if not category_ids:
feature_dict.update({
'image/class/label':
dataset_util.int64_list_feature([0]),
'image/class/text':
dataset_util.bytes_list_feature(['empty'.encode('utf8')]),
})
else:
feature_dict.update({
'image/class/label':
dataset_util.int64_list_feature([category_ids[0]]),
'image/class/text':
dataset_util.bytes_list_feature([category_names[0]]),
})
else:
# Add empty class if there are no annotations
feature_dict.update({
'image/class/label':
dataset_util.int64_list_feature([0]),
'image/class/text':
dataset_util.bytes_list_feature(['empty'.encode('utf8')]),
})
example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
self._num_examples_processed.inc(1)
return [(example)]
def _load_json_data(data_file):
with tf.io.gfile.GFile(data_file, 'r') as fid:
data_dict = json.load(fid)
return data_dict
def create_pipeline(image_directory,
input_annotations_file,
output_tfrecord_prefix=None,
num_images_per_shard=200,
keep_bboxes=True):
"""Creates a beam pipeline for producing a COCO-CameraTraps Image dataset.
Args:
image_directory: Path to image directory
input_annotations_file: Path to a coco-cameratraps annotation file
output_tfrecord_prefix: Absolute path for tfrecord outputs. Final files will
be named {output_tfrecord_prefix}@N.
num_images_per_shard: The number of images to store in each shard
keep_bboxes: Whether to keep any bounding boxes that exist in the json file
Returns:
A Beam pipeline.
"""
logging.info('Reading data from COCO-CameraTraps Dataset.')
data = _load_json_data(input_annotations_file)
num_shards = int(np.ceil(float(len(data['images']))/num_images_per_shard))
def pipeline(root):
"""Builds beam pipeline."""
image_examples = (
root
| ('CreateCollections') >> beam.Create(
[im['id'] for im in data['images']])
| ('ParseImage') >> beam.ParDo(ParseImage(
image_directory, data['images'], data['annotations'],
data['categories'], keep_bboxes=keep_bboxes)))
_ = (image_examples
| ('Reshuffle') >> beam.Reshuffle()
| ('WriteTfImageExample') >> beam.io.tfrecordio.WriteToTFRecord(
output_tfrecord_prefix,
num_shards=num_shards,
coder=beam.coders.ProtoCoder(tf.train.Example)))
return pipeline
def main(_):
"""Runs the Beam pipeline that performs inference.
Args:
_: unused
"""
# must create before flags are used
runner = runners.DirectRunner()
dirname = os.path.dirname(FLAGS.output_tfrecord_prefix)
tf.io.gfile.makedirs(dirname)
runner.run(
create_pipeline(
image_directory=FLAGS.image_directory,
input_annotations_file=FLAGS.input_annotations_file,
output_tfrecord_prefix=FLAGS.output_tfrecord_prefix,
num_images_per_shard=FLAGS.num_images_per_shard))
if __name__ == '__main__':
flags.mark_flags_as_required([
'image_directory',
'input_annotations_file',
'output_tfrecord_prefix'
])
app.run(main)
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for create_cococameratraps_tfexample_main."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import datetime
import json
import os
import tempfile
import unittest
import numpy as np
from PIL import Image
import tensorflow.compat.v1 as tf
from object_detection.dataset_tools.context_rcnn import create_cococameratraps_tfexample_main
from object_detection.utils import tf_version
from apache_beam import runners
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
IMAGE_HEIGHT = 360
IMAGE_WIDTH = 480
def _write_random_images_to_directory(self, directory, num_frames):
for frame_num in range(num_frames):
img = np.random.randint(0, high=256,
size=(self.IMAGE_HEIGHT, self.IMAGE_WIDTH, 3),
dtype=np.uint8)
pil_image = Image.fromarray(img)
fname = 'im_' + str(frame_num) + '.jpg'
pil_image.save(os.path.join(directory, fname), 'JPEG')
def _create_json_file(self, directory, num_frames, keep_bboxes=False):
json_dict = {'images': [], 'annotations': []}
json_dict['categories'] = [{'id': 0, 'name': 'empty'},
{'id': 1, 'name': 'animal'}]
for idx in range(num_frames):
im = {'id': 'im_' + str(idx),
'file_name': 'im_' + str(idx) + '.jpg',
'height': self.IMAGE_HEIGHT,
'width': self.IMAGE_WIDTH,
'seq_id': 'seq_1',
'seq_num_frames': num_frames,
'frame_num': idx,
'location': 'loc_' + str(idx),
'date_captured': str(datetime.datetime.now())
}
json_dict['images'].append(im)
ann = {'id': 'ann' + str(idx),
'image_id': 'im_' + str(idx),
'category_id': 1,
}
if keep_bboxes:
ann['bbox'] = [0.0 * self.IMAGE_WIDTH,
0.1 * self.IMAGE_HEIGHT,
0.5 * self.IMAGE_WIDTH,
0.5 * self.IMAGE_HEIGHT]
json_dict['annotations'].append(ann)
json_path = os.path.join(directory, 'test_file.json')
with tf.io.gfile.GFile(json_path, 'w') as f:
json.dump(json_dict, f)
return json_path
def assert_expected_example_bbox(self, example):
self.assertAllClose(
example.features.feature['image/object/bbox/ymin'].float_list.value,
[0.1])
self.assertAllClose(
example.features.feature['image/object/bbox/xmin'].float_list.value,
[0.0])
self.assertAllClose(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[0.6])
self.assertAllClose(
example.features.feature['image/object/bbox/xmax'].float_list.value,
[0.5])
self.assertAllClose(
example.features.feature['image/object/class/label']
.int64_list.value, [1])
self.assertAllEqual(
example.features.feature['image/object/class/text']
.bytes_list.value, ['animal'])
self.assertAllClose(
example.features.feature['image/class/label']
.int64_list.value, [1])
self.assertAllEqual(
example.features.feature['image/class/text']
.bytes_list.value, ['animal'])
# Check other essential attributes.
self.assertAllEqual(
example.features.feature['image/height'].int64_list.value,
[self.IMAGE_HEIGHT])
self.assertAllEqual(
example.features.feature['image/width'].int64_list.value,
[self.IMAGE_WIDTH])
self.assertAllEqual(
example.features.feature['image/source_id'].bytes_list.value,
['im_0'])
self.assertTrue(
example.features.feature['image/encoded'].bytes_list.value)
def assert_expected_example(self, example):
self.assertAllClose(
example.features.feature['image/object/bbox/ymin'].float_list.value,
[])
self.assertAllClose(
example.features.feature['image/object/bbox/xmin'].float_list.value,
[])
self.assertAllClose(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[])
self.assertAllClose(
example.features.feature['image/object/bbox/xmax'].float_list.value,
[])
self.assertAllClose(
example.features.feature['image/object/class/label']
.int64_list.value, [1])
self.assertAllEqual(
example.features.feature['image/object/class/text']
.bytes_list.value, ['animal'])
self.assertAllClose(
example.features.feature['image/class/label']
.int64_list.value, [1])
self.assertAllEqual(
example.features.feature['image/class/text']
.bytes_list.value, ['animal'])
# Check other essential attributes.
self.assertAllEqual(
example.features.feature['image/height'].int64_list.value,
[self.IMAGE_HEIGHT])
self.assertAllEqual(
example.features.feature['image/width'].int64_list.value,
[self.IMAGE_WIDTH])
self.assertAllEqual(
example.features.feature['image/source_id'].bytes_list.value,
['im_0'])
self.assertTrue(
example.features.feature['image/encoded'].bytes_list.value)
def test_beam_pipeline(self):
runner = runners.DirectRunner()
num_frames = 1
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
json_path = self._create_json_file(temp_dir, num_frames)
output_tfrecord = temp_dir+'/output'
self._write_random_images_to_directory(temp_dir, num_frames)
pipeline = create_cococameratraps_tfexample_main.create_pipeline(
temp_dir, json_path,
output_tfrecord_prefix=output_tfrecord)
runner.run(pipeline)
filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
actual_output = []
record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
for record in record_iterator:
actual_output.append(record)
self.assertEqual(len(actual_output), num_frames)
self.assert_expected_example(tf.train.Example.FromString(
actual_output[0]))
def test_beam_pipeline_bbox(self):
runner = runners.DirectRunner()
num_frames = 1
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
json_path = self._create_json_file(temp_dir, num_frames, keep_bboxes=True)
output_tfrecord = temp_dir+'/output'
self._write_random_images_to_directory(temp_dir, num_frames)
pipeline = create_cococameratraps_tfexample_main.create_pipeline(
temp_dir, json_path,
output_tfrecord_prefix=output_tfrecord,
keep_bboxes=True)
runner.run(pipeline)
filenames = tf.io.gfile.glob(output_tfrecord+'-?????-of-?????')
actual_output = []
record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
for record in record_iterator:
actual_output.append(record)
self.assertEqual(len(actual_output), num_frames)
self.assert_expected_example_bbox(tf.train.Example.FromString(
actual_output[0]))
if __name__ == '__main__':
tf.test.main()
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""A Beam job to generate detection data for camera trap images.
This tools allows to run inference with an exported Object Detection model in
`saved_model` format and produce raw detection boxes on images in tf.Examples,
with the assumption that the bounding box class label will match the image-level
class label in the tf.Example.
Steps to generate a detection dataset:
1. Use object_detection/export_inference_graph.py to get a `saved_model` for
inference. The input node must accept a tf.Example proto.
2. Run this tool with `saved_model` from step 1 and an TFRecord of tf.Example
protos containing images for inference.
Example Usage:
--------------
python tensorflow_models/object_detection/export_inference_graph.py \
--alsologtostderr \
--input_type tf_example \
--pipeline_config_path path/to/detection_model.config \
--trained_checkpoint_prefix path/to/model.ckpt \
--output_directory path/to/exported_model_directory
python generate_detection_data.py \
--alsologtostderr \
--input_tfrecord path/to/input_tfrecord@X \
--output_tfrecord path/to/output_tfrecord@X \
--model_dir path/to/exported_model_directory/saved_model
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import threading
from absl import app
from absl import flags
import apache_beam as beam
import tensorflow.compat.v1 as tf
from apache_beam import runners
flags.DEFINE_string('detection_input_tfrecord', None, 'TFRecord containing '
'images in tf.Example format for object detection.')
flags.DEFINE_string('detection_output_tfrecord', None,
'TFRecord containing detections in tf.Example format.')
flags.DEFINE_string('detection_model_dir', None, 'Path to directory containing'
'an object detection SavedModel.')
flags.DEFINE_float('confidence_threshold', 0.9,
'Min confidence to keep bounding boxes')
flags.DEFINE_integer('num_shards', 0, 'Number of output shards.')
FLAGS = flags.FLAGS
class GenerateDetectionDataFn(beam.DoFn):
"""Generates detection data for camera trap images.
This Beam DoFn performs inference with an object detection `saved_model` and
produces detection boxes for camera trap data, matched to the
object class.
"""
session_lock = threading.Lock()
def __init__(self, model_dir, confidence_threshold):
"""Initialization function.
Args:
model_dir: A directory containing saved model.
confidence_threshold: the confidence threshold for boxes to keep
"""
self._model_dir = model_dir
self._confidence_threshold = confidence_threshold
self._session = None
self._num_examples_processed = beam.metrics.Metrics.counter(
'detection_data_generation', 'num_tf_examples_processed')
def start_bundle(self):
self._load_inference_model()
def _load_inference_model(self):
# Because initialization of the tf.Session is expensive we share
# one instance across all threads in the worker. This is possible since
# tf.Session.run() is thread safe.
with self.session_lock:
if self._session is None:
graph = tf.Graph()
self._session = tf.Session(graph=graph)
with graph.as_default():
meta_graph = tf.saved_model.loader.load(
self._session, [tf.saved_model.tag_constants.SERVING],
self._model_dir)
signature = meta_graph.signature_def['serving_default']
input_tensor_name = signature.inputs['inputs'].name
self._input = graph.get_tensor_by_name(input_tensor_name)
self._boxes_node = graph.get_tensor_by_name(
signature.outputs['detection_boxes'].name)
self._scores_node = graph.get_tensor_by_name(
signature.outputs['detection_scores'].name)
self._num_detections_node = graph.get_tensor_by_name(
signature.outputs['num_detections'].name)
def process(self, tfrecord_entry):
return self._run_inference_and_generate_detections(tfrecord_entry)
def _run_inference_and_generate_detections(self, tfrecord_entry):
input_example = tf.train.Example.FromString(tfrecord_entry)
if input_example.features.feature[
'image/object/bbox/ymin'].float_list.value:
# There are already ground truth boxes for this image, just keep them.
return [input_example]
detection_boxes, detection_scores, num_detections = self._session.run(
[self._boxes_node, self._scores_node, self._num_detections_node],
feed_dict={self._input: [tfrecord_entry]})
example = tf.train.Example()
num_detections = int(num_detections[0])
image_class_labels = input_example.features.feature[
'image/object/class/label'].int64_list.value
image_class_texts = input_example.features.feature[
'image/object/class/text'].bytes_list.value
# Ignore any images with multiple classes,
# we can't match the class to the box.
if len(image_class_labels) > 1:
return []
# Don't add boxes for images already labeled empty (for now)
if len(image_class_labels) == 1:
# Add boxes over confidence threshold.
for idx, score in enumerate(detection_scores[0]):
if score >= self._confidence_threshold and idx < num_detections:
example.features.feature[
'image/object/bbox/ymin'].float_list.value.extend([
detection_boxes[0, idx, 0]])
example.features.feature[
'image/object/bbox/xmin'].float_list.value.extend([
detection_boxes[0, idx, 1]])
example.features.feature[
'image/object/bbox/ymax'].float_list.value.extend([
detection_boxes[0, idx, 2]])
example.features.feature[
'image/object/bbox/xmax'].float_list.value.extend([
detection_boxes[0, idx, 3]])
# Add box scores and class texts and labels.
example.features.feature[
'image/object/class/score'].float_list.value.extend(
[score])
example.features.feature[
'image/object/class/label'].int64_list.value.extend(
[image_class_labels[0]])
example.features.feature[
'image/object/class/text'].bytes_list.value.extend(
[image_class_texts[0]])
# Add other essential example attributes
example.features.feature['image/encoded'].bytes_list.value.extend(
input_example.features.feature['image/encoded'].bytes_list.value)
example.features.feature['image/height'].int64_list.value.extend(
input_example.features.feature['image/height'].int64_list.value)
example.features.feature['image/width'].int64_list.value.extend(
input_example.features.feature['image/width'].int64_list.value)
example.features.feature['image/source_id'].bytes_list.value.extend(
input_example.features.feature['image/source_id'].bytes_list.value)
example.features.feature['image/location'].bytes_list.value.extend(
input_example.features.feature['image/location'].bytes_list.value)
example.features.feature['image/date_captured'].bytes_list.value.extend(
input_example.features.feature['image/date_captured'].bytes_list.value)
example.features.feature['image/class/text'].bytes_list.value.extend(
input_example.features.feature['image/class/text'].bytes_list.value)
example.features.feature['image/class/label'].int64_list.value.extend(
input_example.features.feature['image/class/label'].int64_list.value)
example.features.feature['image/seq_id'].bytes_list.value.extend(
input_example.features.feature['image/seq_id'].bytes_list.value)
example.features.feature['image/seq_num_frames'].int64_list.value.extend(
input_example.features.feature['image/seq_num_frames'].int64_list.value)
example.features.feature['image/seq_frame_num'].int64_list.value.extend(
input_example.features.feature['image/seq_frame_num'].int64_list.value)
self._num_examples_processed.inc(1)
return [example]
def construct_pipeline(input_tfrecord, output_tfrecord, model_dir,
confidence_threshold, num_shards):
"""Returns a Beam pipeline to run object detection inference.
Args:
input_tfrecord: A TFRecord of tf.train.Example protos containing images.
output_tfrecord: A TFRecord of tf.train.Example protos that contain images
in the input TFRecord and the detections from the model.
model_dir: Path to `saved_model` to use for inference.
confidence_threshold: Threshold to use when keeping detection results.
num_shards: The number of output shards.
Returns:
pipeline: A Beam pipeline.
"""
def pipeline(root):
input_collection = (
root | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
input_tfrecord,
coder=beam.coders.BytesCoder()))
output_collection = input_collection | 'RunInference' >> beam.ParDo(
GenerateDetectionDataFn(model_dir, confidence_threshold))
output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle()
_ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
output_tfrecord,
num_shards=num_shards,
coder=beam.coders.ProtoCoder(tf.train.Example))
return pipeline
def main(_):
"""Runs the Beam pipeline that performs inference.
Args:
_: unused
"""
# must create before flags are used
runner = runners.DirectRunner()
dirname = os.path.dirname(FLAGS.detection_output_tfrecord)
tf.io.gfile.makedirs(dirname)
runner.run(
construct_pipeline(FLAGS.detection_input_tfrecord,
FLAGS.detection_output_tfrecord,
FLAGS.detection_model_dir,
FLAGS.confidence_threshold,
FLAGS.num_shards))
if __name__ == '__main__':
flags.mark_flags_as_required([
'detection_input_tfrecord',
'detection_output_tfrecord',
'detection_model_dir'
])
app.run(main)
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for generate_detection_data."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import contextlib
import os
import tempfile
import unittest
import numpy as np
import six
import tensorflow.compat.v1 as tf
from object_detection import exporter
from object_detection.builders import model_builder
from object_detection.core import model
from object_detection.dataset_tools.context_rcnn import generate_detection_data
from object_detection.protos import pipeline_pb2
from object_detection.utils import tf_version
from apache_beam import runners
if six.PY2:
import mock # pylint: disable=g-import-not-at-top
else:
mock = unittest.mock
class FakeModel(model.DetectionModel):
"""A Fake Detection model with expected output nodes from post-processing."""
def preprocess(self, inputs):
true_image_shapes = [] # Doesn't matter for the fake model.
return tf.identity(inputs), true_image_shapes
def predict(self, preprocessed_inputs, true_image_shapes):
return {'image': tf.layers.conv2d(preprocessed_inputs, 3, 1)}
def postprocess(self, prediction_dict, true_image_shapes):
with tf.control_dependencies(prediction_dict.values()):
postprocessed_tensors = {
'detection_boxes': tf.constant([[[0.0, 0.1, 0.5, 0.6],
[0.5, 0.5, 0.8, 0.8]]], tf.float32),
'detection_scores': tf.constant([[0.95, 0.6]], tf.float32),
'detection_multiclass_scores': tf.constant([[[0.1, 0.7, 0.2],
[0.3, 0.1, 0.6]]],
tf.float32),
'detection_classes': tf.constant([[0, 1]], tf.float32),
'num_detections': tf.constant([2], tf.float32)
}
return postprocessed_tensors
def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
pass
def loss(self, prediction_dict, true_image_shapes):
pass
def regularization_losses(self):
pass
def updates(self):
pass
@contextlib.contextmanager
def InMemoryTFRecord(entries):
temp = tempfile.NamedTemporaryFile(delete=False)
filename = temp.name
try:
with tf.python_io.TFRecordWriter(filename) as writer:
for value in entries:
writer.write(value)
yield filename
finally:
os.unlink(filename)
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class GenerateDetectionDataTest(tf.test.TestCase):
def _save_checkpoint_from_mock_model(self, checkpoint_path):
"""A function to save checkpoint from a fake Detection Model.
Args:
checkpoint_path: Path to save checkpoint from Fake model.
"""
g = tf.Graph()
with g.as_default():
mock_model = FakeModel(num_classes=5)
preprocessed_inputs, true_image_shapes = mock_model.preprocess(
tf.placeholder(tf.float32, shape=[None, None, None, 3]))
predictions = mock_model.predict(preprocessed_inputs, true_image_shapes)
mock_model.postprocess(predictions, true_image_shapes)
tf.train.get_or_create_global_step()
saver = tf.train.Saver()
init = tf.global_variables_initializer()
with self.test_session(graph=g) as sess:
sess.run(init)
saver.save(sess, checkpoint_path)
def _export_saved_model(self):
tmp_dir = self.get_temp_dir()
checkpoint_path = os.path.join(tmp_dir, 'model.ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path)
output_directory = os.path.join(tmp_dir, 'output')
saved_model_path = os.path.join(output_directory, 'saved_model')
tf.io.gfile.makedirs(output_directory)
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(num_classes=5)
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
detection_model = model_builder.build(pipeline_config.model,
is_training=False)
outputs, placeholder_tensor = exporter.build_detection_graph(
input_type='tf_example',
detection_model=detection_model,
input_shape=None,
output_collection_name='inference_op',
graph_hook_fn=None)
output_node_names = ','.join(outputs.keys())
saver = tf.train.Saver()
input_saver_def = saver.as_saver_def()
frozen_graph_def = exporter.freeze_graph_with_def_protos(
input_graph_def=tf.get_default_graph().as_graph_def(),
input_saver_def=input_saver_def,
input_checkpoint=checkpoint_path,
output_node_names=output_node_names,
restore_op_name='save/restore_all',
filename_tensor_name='save/Const:0',
output_graph='',
clear_devices=True,
initializer_nodes='')
exporter.write_saved_model(
saved_model_path=saved_model_path,
frozen_graph_def=frozen_graph_def,
inputs=placeholder_tensor,
outputs=outputs)
return saved_model_path
def _create_tf_example(self):
with self.test_session():
encoded_image = tf.image.encode_jpeg(
tf.constant(np.ones((4, 6, 3)).astype(np.uint8))).eval()
def BytesFeature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def Int64Feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': BytesFeature(encoded_image),
'image/source_id': BytesFeature(b'image_id'),
'image/height': Int64Feature(4),
'image/width': Int64Feature(6),
'image/object/class/label': Int64Feature(5),
'image/object/class/text': BytesFeature(b'hyena'),
'image/class/label': Int64Feature(5),
'image/class/text': BytesFeature(b'hyena'),
}))
return example.SerializeToString()
def assert_expected_example(self, example):
self.assertAllClose(
example.features.feature['image/object/bbox/ymin'].float_list.value,
[0.0])
self.assertAllClose(
example.features.feature['image/object/bbox/xmin'].float_list.value,
[0.1])
self.assertAllClose(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[0.5])
self.assertAllClose(
example.features.feature['image/object/bbox/xmax'].float_list.value,
[0.6])
self.assertAllClose(
example.features.feature['image/object/class/score']
.float_list.value, [0.95])
self.assertAllClose(
example.features.feature['image/object/class/label']
.int64_list.value, [5])
self.assertAllEqual(
example.features.feature['image/object/class/text']
.bytes_list.value, [b'hyena'])
self.assertAllClose(
example.features.feature['image/class/label']
.int64_list.value, [5])
self.assertAllEqual(
example.features.feature['image/class/text']
.bytes_list.value, [b'hyena'])
# Check other essential attributes.
self.assertAllEqual(
example.features.feature['image/height'].int64_list.value, [4])
self.assertAllEqual(
example.features.feature['image/width'].int64_list.value, [6])
self.assertAllEqual(
example.features.feature['image/source_id'].bytes_list.value,
[b'image_id'])
self.assertTrue(
example.features.feature['image/encoded'].bytes_list.value)
def test_generate_detection_data_fn(self):
saved_model_path = self._export_saved_model()
confidence_threshold = 0.8
inference_fn = generate_detection_data.GenerateDetectionDataFn(
saved_model_path, confidence_threshold)
inference_fn.start_bundle()
generated_example = self._create_tf_example()
self.assertAllEqual(tf.train.Example.FromString(
generated_example).features.feature['image/object/class/label']
.int64_list.value, [5])
self.assertAllEqual(tf.train.Example.FromString(
generated_example).features.feature['image/object/class/text']
.bytes_list.value, [b'hyena'])
output = inference_fn.process(generated_example)
output_example = output[0]
self.assertAllEqual(
output_example.features.feature['image/object/class/label']
.int64_list.value, [5])
self.assertAllEqual(output_example.features.feature['image/width']
.int64_list.value, [6])
self.assert_expected_example(output_example)
def test_beam_pipeline(self):
with InMemoryTFRecord([self._create_tf_example()]) as input_tfrecord:
runner = runners.DirectRunner()
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
saved_model_path = self._export_saved_model()
confidence_threshold = 0.8
num_shards = 1
pipeline = generate_detection_data.construct_pipeline(
input_tfrecord, output_tfrecord, saved_model_path,
confidence_threshold, num_shards)
runner.run(pipeline)
filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
actual_output = []
record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
for record in record_iterator:
actual_output.append(record)
self.assertEqual(len(actual_output), 1)
self.assert_expected_example(tf.train.Example.FromString(
actual_output[0]))
if __name__ == '__main__':
tf.test.main()
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""A Beam job to generate embedding data for camera trap images.
This tool runs inference with an exported Object Detection model in
`saved_model` format and produce raw embeddings for camera trap data. These
embeddings contain an object-centric feature embedding from Faster R-CNN, the
datetime that the image was taken (normalized in a specific way), and the
position of the object of interest. By default, only the highest-scoring object
embedding is included.
Steps to generate a embedding dataset:
1. Use object_detection/export_inference_graph.py to get a Faster R-CNN
`saved_model` for inference. The input node must accept a tf.Example proto.
2. Run this tool with `saved_model` from step 1 and an TFRecord of tf.Example
protos containing images for inference.
Example Usage:
--------------
python tensorflow_models/object_detection/export_inference_graph.py \
--alsologtostderr \
--input_type tf_example \
--pipeline_config_path path/to/faster_rcnn_model.config \
--trained_checkpoint_prefix path/to/model.ckpt \
--output_directory path/to/exported_model_directory
python generate_embedding_data.py \
--alsologtostderr \
--embedding_input_tfrecord path/to/input_tfrecords* \
--embedding_output_tfrecord path/to/output_tfrecords \
--embedding_model_dir path/to/exported_model_directory/saved_model
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import datetime
import os
import threading
from absl import app
from absl import flags
import apache_beam as beam
import numpy as np
import six
import tensorflow.compat.v1 as tf
from apache_beam import runners
flags.DEFINE_string('embedding_input_tfrecord', None, 'TFRecord containing'
'images in tf.Example format for object detection.')
flags.DEFINE_string('embedding_output_tfrecord', None,
'TFRecord containing embeddings in tf.Example format.')
flags.DEFINE_string('embedding_model_dir', None, 'Path to directory containing'
'an object detection SavedModel with'
'detection_box_classifier_features in the output.')
flags.DEFINE_integer('top_k_embedding_count', 1,
'The number of top k embeddings to add to the memory bank.'
)
flags.DEFINE_integer('bottom_k_embedding_count', 0,
'The number of bottom k embeddings to add to the memory '
'bank.')
flags.DEFINE_integer('num_shards', 0, 'Number of output shards.')
FLAGS = flags.FLAGS
class GenerateEmbeddingDataFn(beam.DoFn):
"""Generates embedding data for camera trap images.
This Beam DoFn performs inference with an object detection `saved_model` and
produces contextual embedding vectors.
"""
session_lock = threading.Lock()
def __init__(self, model_dir, top_k_embedding_count,
bottom_k_embedding_count):
"""Initialization function.
Args:
model_dir: A directory containing saved model.
top_k_embedding_count: the number of high-confidence embeddings to store
bottom_k_embedding_count: the number of low-confidence embeddings to store
"""
self._model_dir = model_dir
self._session = None
self._num_examples_processed = beam.metrics.Metrics.counter(
'embedding_data_generation', 'num_tf_examples_processed')
self._top_k_embedding_count = top_k_embedding_count
self._bottom_k_embedding_count = bottom_k_embedding_count
def start_bundle(self):
self._load_inference_model()
def _load_inference_model(self):
# Because initialization of the tf.Session is expensive we share
# one instance across all threads in the worker. This is possible since
# tf.Session.run() is thread safe.
with self.session_lock:
if self._session is None:
graph = tf.Graph()
self._session = tf.Session(graph=graph)
with graph.as_default():
meta_graph = tf.saved_model.loader.load(
self._session, [tf.saved_model.tag_constants.SERVING],
self._model_dir)
signature = meta_graph.signature_def['serving_default']
input_tensor_name = signature.inputs['inputs'].name
detection_features_name = signature.outputs['detection_features'].name
detection_boxes_name = signature.outputs['detection_boxes'].name
num_detections_name = signature.outputs['num_detections'].name
self._input = graph.get_tensor_by_name(input_tensor_name)
self._embedding_node = graph.get_tensor_by_name(detection_features_name)
self._box_node = graph.get_tensor_by_name(detection_boxes_name)
self._scores_node = graph.get_tensor_by_name(
signature.outputs['detection_scores'].name)
self._num_detections = graph.get_tensor_by_name(num_detections_name)
tf.logging.info(signature.outputs['detection_features'].name)
tf.logging.info(signature.outputs['detection_boxes'].name)
tf.logging.info(signature.outputs['num_detections'].name)
def process(self, tfrecord_entry):
return self._run_inference_and_generate_embedding(tfrecord_entry)
def _run_inference_and_generate_embedding(self, tfrecord_entry):
input_example = tf.train.Example.FromString(tfrecord_entry)
# Convert date_captured datetime string to unix time integer and store
def get_date_captured(example):
date_captured = datetime.datetime.strptime(
six.ensure_str(
example.features.feature[
'image/date_captured'].bytes_list.value[0]),
'%Y-%m-%d %H:%M:%S')
return date_captured
try:
date_captured = get_date_captured(input_example)
except Exception: # pylint: disable=broad-except
# we require date_captured to be available for all images
return []
def embed_date_captured(date_captured):
"""Encodes the datetime of the image."""
embedded_date_captured = []
month_max = 12.0
day_max = 31.0
hour_max = 24.0
minute_max = 60.0
min_year = 1990.0
max_year = 2030.0
year = (date_captured.year-min_year)/float(max_year-min_year)
embedded_date_captured.append(year)
month = (date_captured.month-1)/month_max
embedded_date_captured.append(month)
day = (date_captured.day-1)/day_max
embedded_date_captured.append(day)
hour = date_captured.hour/hour_max
embedded_date_captured.append(hour)
minute = date_captured.minute/minute_max
embedded_date_captured.append(minute)
return np.asarray(embedded_date_captured)
def embed_position_and_size(box):
"""Encodes the bounding box of the object of interest."""
ymin = box[0]
xmin = box[1]
ymax = box[2]
xmax = box[3]
w = xmax - xmin
h = ymax - ymin
x = xmin + w / 2.0
y = ymin + h / 2.0
return np.asarray([x, y, w, h])
unix_time = (
(date_captured - datetime.datetime.fromtimestamp(0)).total_seconds())
example = tf.train.Example()
example.features.feature['image/unix_time'].float_list.value.extend(
[unix_time])
(detection_features, detection_boxes, num_detections,
detection_scores) = self._session.run(
[
self._embedding_node, self._box_node, self._num_detections[0],
self._scores_node
],
feed_dict={self._input: [tfrecord_entry]})
num_detections = int(num_detections)
embed_all = []
score_all = []
detection_features = np.asarray(detection_features)
def get_bb_embedding(detection_features, detection_boxes, detection_scores,
index):
embedding = detection_features[0][index]
pooled_embedding = np.mean(np.mean(embedding, axis=1), axis=0)
box = detection_boxes[0][index]
position_embedding = embed_position_and_size(box)
score = detection_scores[0][index]
return np.concatenate((pooled_embedding, position_embedding)), score
temporal_embedding = embed_date_captured(date_captured)
embedding_count = 0
for index in range(min(num_detections, self._top_k_embedding_count)):
bb_embedding, score = get_bb_embedding(
detection_features, detection_boxes, detection_scores, index)
embed_all.extend(bb_embedding)
embed_all.extend(temporal_embedding)
score_all.append(score)
embedding_count += 1
for index in range(
max(0, num_detections - 1),
max(-1, num_detections - 1 - self._bottom_k_embedding_count), -1):
bb_embedding, score = get_bb_embedding(
detection_features, detection_boxes, detection_scores, index)
embed_all.extend(bb_embedding)
embed_all.extend(temporal_embedding)
score_all.append(score)
embedding_count += 1
if embedding_count == 0:
bb_embedding, score = get_bb_embedding(
detection_features, detection_boxes, detection_scores, 0)
embed_all.extend(bb_embedding)
embed_all.extend(temporal_embedding)
score_all.append(score)
# Takes max in case embedding_count is 0.
embedding_length = len(embed_all) // max(1, embedding_count)
embed_all = np.asarray(embed_all)
example.features.feature['image/embedding'].float_list.value.extend(
embed_all)
example.features.feature['image/embedding_score'].float_list.value.extend(
score_all)
example.features.feature['image/embedding_length'].int64_list.value.append(
embedding_length)
example.features.feature['image/embedding_count'].int64_list.value.append(
embedding_count)
# Add other essential example attributes
example.features.feature['image/encoded'].bytes_list.value.extend(
input_example.features.feature['image/encoded'].bytes_list.value)
example.features.feature['image/height'].int64_list.value.extend(
input_example.features.feature['image/height'].int64_list.value)
example.features.feature['image/width'].int64_list.value.extend(
input_example.features.feature['image/width'].int64_list.value)
example.features.feature['image/source_id'].bytes_list.value.extend(
input_example.features.feature['image/source_id'].bytes_list.value)
example.features.feature['image/location'].bytes_list.value.extend(
input_example.features.feature['image/location'].bytes_list.value)
example.features.feature['image/date_captured'].bytes_list.value.extend(
input_example.features.feature['image/date_captured'].bytes_list.value)
example.features.feature['image/class/text'].bytes_list.value.extend(
input_example.features.feature['image/class/text'].bytes_list.value)
example.features.feature['image/class/label'].int64_list.value.extend(
input_example.features.feature['image/class/label'].int64_list.value)
example.features.feature['image/seq_id'].bytes_list.value.extend(
input_example.features.feature['image/seq_id'].bytes_list.value)
example.features.feature['image/seq_num_frames'].int64_list.value.extend(
input_example.features.feature['image/seq_num_frames'].int64_list.value)
example.features.feature['image/seq_frame_num'].int64_list.value.extend(
input_example.features.feature['image/seq_frame_num'].int64_list.value)
example.features.feature['image/object/bbox/ymax'].float_list.value.extend(
input_example.features.feature[
'image/object/bbox/ymax'].float_list.value)
example.features.feature['image/object/bbox/ymin'].float_list.value.extend(
input_example.features.feature[
'image/object/bbox/ymin'].float_list.value)
example.features.feature['image/object/bbox/xmax'].float_list.value.extend(
input_example.features.feature[
'image/object/bbox/xmax'].float_list.value)
example.features.feature['image/object/bbox/xmin'].float_list.value.extend(
input_example.features.feature[
'image/object/bbox/xmin'].float_list.value)
example.features.feature[
'image/object/class/score'].float_list.value.extend(
input_example.features.feature[
'image/object/class/score'].float_list.value)
example.features.feature[
'image/object/class/label'].int64_list.value.extend(
input_example.features.feature[
'image/object/class/label'].int64_list.value)
example.features.feature[
'image/object/class/text'].bytes_list.value.extend(
input_example.features.feature[
'image/object/class/text'].bytes_list.value)
self._num_examples_processed.inc(1)
return [example]
def construct_pipeline(input_tfrecord, output_tfrecord, model_dir,
top_k_embedding_count, bottom_k_embedding_count,
num_shards):
"""Returns a beam pipeline to run object detection inference.
Args:
input_tfrecord: An TFRecord of tf.train.Example protos containing images.
output_tfrecord: An TFRecord of tf.train.Example protos that contain images
in the input TFRecord and the detections from the model.
model_dir: Path to `saved_model` to use for inference.
top_k_embedding_count: The number of high-confidence embeddings to store.
bottom_k_embedding_count: The number of low-confidence embeddings to store.
num_shards: The number of output shards.
"""
def pipeline(root):
input_collection = (
root | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
input_tfrecord,
coder=beam.coders.BytesCoder()))
output_collection = input_collection | 'ExtractEmbedding' >> beam.ParDo(
GenerateEmbeddingDataFn(model_dir, top_k_embedding_count,
bottom_k_embedding_count))
output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle()
_ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
output_tfrecord,
num_shards=num_shards,
coder=beam.coders.ProtoCoder(tf.train.Example))
return pipeline
def main(_):
"""Runs the Beam pipeline that performs inference.
Args:
_: unused
"""
# must create before flags are used
runner = runners.DirectRunner()
dirname = os.path.dirname(FLAGS.embedding_output_tfrecord)
tf.io.gfile.makedirs(dirname)
runner.run(
construct_pipeline(FLAGS.embedding_input_tfrecord,
FLAGS.embedding_output_tfrecord,
FLAGS.embedding_model_dir, FLAGS.top_k_embedding_count,
FLAGS.bottom_k_embedding_count, FLAGS.num_shards))
if __name__ == '__main__':
flags.mark_flags_as_required([
'embedding_input_tfrecord',
'embedding_output_tfrecord',
'embedding_model_dir'
])
app.run(main)
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for generate_embedding_data."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import contextlib
import os
import tempfile
import unittest
import numpy as np
import six
import tensorflow.compat.v1 as tf
from object_detection import exporter
from object_detection.builders import model_builder
from object_detection.core import model
from object_detection.dataset_tools.context_rcnn import generate_embedding_data
from object_detection.protos import pipeline_pb2
from object_detection.utils import tf_version
from apache_beam import runners
if six.PY2:
import mock # pylint: disable=g-import-not-at-top
else:
mock = unittest.mock
class FakeModel(model.DetectionModel):
"""A Fake Detection model with expected output nodes from post-processing."""
def preprocess(self, inputs):
true_image_shapes = [] # Doesn't matter for the fake model.
return tf.identity(inputs), true_image_shapes
def predict(self, preprocessed_inputs, true_image_shapes):
return {'image': tf.layers.conv2d(preprocessed_inputs, 3, 1)}
def postprocess(self, prediction_dict, true_image_shapes):
with tf.control_dependencies(prediction_dict.values()):
num_features = 100
feature_dims = 10
classifier_feature = np.ones(
(2, feature_dims, feature_dims, num_features),
dtype=np.float32).tolist()
postprocessed_tensors = {
'detection_boxes': tf.constant([[[0.0, 0.1, 0.5, 0.6],
[0.5, 0.5, 0.8, 0.8]]], tf.float32),
'detection_scores': tf.constant([[0.95, 0.6]], tf.float32),
'detection_multiclass_scores': tf.constant([[[0.1, 0.7, 0.2],
[0.3, 0.1, 0.6]]],
tf.float32),
'detection_classes': tf.constant([[0, 1]], tf.float32),
'num_detections': tf.constant([2], tf.float32),
'detection_features':
tf.constant([classifier_feature],
tf.float32)
}
return postprocessed_tensors
def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
pass
def loss(self, prediction_dict, true_image_shapes):
pass
def regularization_losses(self):
pass
def updates(self):
pass
@contextlib.contextmanager
def InMemoryTFRecord(entries):
temp = tempfile.NamedTemporaryFile(delete=False)
filename = temp.name
try:
with tf.python_io.TFRecordWriter(filename) as writer:
for value in entries:
writer.write(value)
yield filename
finally:
os.unlink(temp.name)
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class GenerateEmbeddingData(tf.test.TestCase):
def _save_checkpoint_from_mock_model(self, checkpoint_path):
"""A function to save checkpoint from a fake Detection Model.
Args:
checkpoint_path: Path to save checkpoint from Fake model.
"""
g = tf.Graph()
with g.as_default():
mock_model = FakeModel(num_classes=5)
preprocessed_inputs, true_image_shapes = mock_model.preprocess(
tf.placeholder(tf.float32, shape=[None, None, None, 3]))
predictions = mock_model.predict(preprocessed_inputs, true_image_shapes)
mock_model.postprocess(predictions, true_image_shapes)
tf.train.get_or_create_global_step()
saver = tf.train.Saver()
init = tf.global_variables_initializer()
with self.test_session(graph=g) as sess:
sess.run(init)
saver.save(sess, checkpoint_path)
def _export_saved_model(self):
tmp_dir = self.get_temp_dir()
checkpoint_path = os.path.join(tmp_dir, 'model.ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path)
output_directory = os.path.join(tmp_dir, 'output')
saved_model_path = os.path.join(output_directory, 'saved_model')
tf.io.gfile.makedirs(output_directory)
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(num_classes=5)
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
detection_model = model_builder.build(pipeline_config.model,
is_training=False)
outputs, placeholder_tensor = exporter.build_detection_graph(
input_type='tf_example',
detection_model=detection_model,
input_shape=None,
output_collection_name='inference_op',
graph_hook_fn=None)
output_node_names = ','.join(outputs.keys())
saver = tf.train.Saver()
input_saver_def = saver.as_saver_def()
frozen_graph_def = exporter.freeze_graph_with_def_protos(
input_graph_def=tf.get_default_graph().as_graph_def(),
input_saver_def=input_saver_def,
input_checkpoint=checkpoint_path,
output_node_names=output_node_names,
restore_op_name='save/restore_all',
filename_tensor_name='save/Const:0',
output_graph='',
clear_devices=True,
initializer_nodes='')
exporter.write_saved_model(
saved_model_path=saved_model_path,
frozen_graph_def=frozen_graph_def,
inputs=placeholder_tensor,
outputs=outputs)
return saved_model_path
def _create_tf_example(self):
with self.test_session():
encoded_image = tf.image.encode_jpeg(
tf.constant(np.ones((4, 4, 3)).astype(np.uint8))).eval()
def BytesFeature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def Int64Feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def FloatFeature(value):
return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': BytesFeature(encoded_image),
'image/source_id': BytesFeature(b'image_id'),
'image/height': Int64Feature(400),
'image/width': Int64Feature(600),
'image/class/label': Int64Feature(5),
'image/class/text': BytesFeature(b'hyena'),
'image/object/bbox/xmin': FloatFeature(0.1),
'image/object/bbox/xmax': FloatFeature(0.6),
'image/object/bbox/ymin': FloatFeature(0.0),
'image/object/bbox/ymax': FloatFeature(0.5),
'image/object/class/score': FloatFeature(0.95),
'image/object/class/label': Int64Feature(5),
'image/object/class/text': BytesFeature(b'hyena'),
'image/date_captured': BytesFeature(b'2019-10-20 12:12:12')
}))
return example.SerializeToString()
def assert_expected_example(self, example, topk=False, botk=False):
# Check embeddings
if topk or botk:
self.assertEqual(len(
example.features.feature['image/embedding'].float_list.value),
218)
self.assertAllEqual(
example.features.feature['image/embedding_count'].int64_list.value,
[2])
else:
self.assertEqual(len(
example.features.feature['image/embedding'].float_list.value),
109)
self.assertAllEqual(
example.features.feature['image/embedding_count'].int64_list.value,
[1])
self.assertAllEqual(
example.features.feature['image/embedding_length'].int64_list.value,
[109])
# Check annotations
self.assertAllClose(
example.features.feature['image/object/bbox/ymin'].float_list.value,
[0.0])
self.assertAllClose(
example.features.feature['image/object/bbox/xmin'].float_list.value,
[0.1])
self.assertAllClose(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[0.5])
self.assertAllClose(
example.features.feature['image/object/bbox/xmax'].float_list.value,
[0.6])
self.assertAllClose(
example.features.feature['image/object/class/score']
.float_list.value, [0.95])
self.assertAllClose(
example.features.feature['image/object/class/label']
.int64_list.value, [5])
self.assertAllEqual(
example.features.feature['image/object/class/text']
.bytes_list.value, ['hyena'])
self.assertAllClose(
example.features.feature['image/class/label']
.int64_list.value, [5])
self.assertAllEqual(
example.features.feature['image/class/text']
.bytes_list.value, ['hyena'])
# Check other essential attributes.
self.assertAllEqual(
example.features.feature['image/height'].int64_list.value, [400])
self.assertAllEqual(
example.features.feature['image/width'].int64_list.value, [600])
self.assertAllEqual(
example.features.feature['image/source_id'].bytes_list.value,
['image_id'])
self.assertTrue(
example.features.feature['image/encoded'].bytes_list.value)
def test_generate_embedding_data_fn(self):
saved_model_path = self._export_saved_model()
top_k_embedding_count = 1
bottom_k_embedding_count = 0
inference_fn = generate_embedding_data.GenerateEmbeddingDataFn(
saved_model_path, top_k_embedding_count, bottom_k_embedding_count)
inference_fn.start_bundle()
generated_example = self._create_tf_example()
self.assertAllEqual(tf.train.Example.FromString(
generated_example).features.feature['image/object/class/label']
.int64_list.value, [5])
self.assertAllEqual(tf.train.Example.FromString(
generated_example).features.feature['image/object/class/text']
.bytes_list.value, ['hyena'])
output = inference_fn.process(generated_example)
output_example = output[0]
self.assert_expected_example(output_example)
def test_generate_embedding_data_with_top_k_boxes(self):
saved_model_path = self._export_saved_model()
top_k_embedding_count = 2
bottom_k_embedding_count = 0
inference_fn = generate_embedding_data.GenerateEmbeddingDataFn(
saved_model_path, top_k_embedding_count, bottom_k_embedding_count)
inference_fn.start_bundle()
generated_example = self._create_tf_example()
self.assertAllEqual(
tf.train.Example.FromString(generated_example).features
.feature['image/object/class/label'].int64_list.value, [5])
self.assertAllEqual(
tf.train.Example.FromString(generated_example).features
.feature['image/object/class/text'].bytes_list.value, [b'hyena'])
output = inference_fn.process(generated_example)
output_example = output[0]
self.assert_expected_example(output_example, topk=True)
def test_generate_embedding_data_with_bottom_k_boxes(self):
saved_model_path = self._export_saved_model()
top_k_embedding_count = 0
bottom_k_embedding_count = 2
inference_fn = generate_embedding_data.GenerateEmbeddingDataFn(
saved_model_path, top_k_embedding_count, bottom_k_embedding_count)
inference_fn.start_bundle()
generated_example = self._create_tf_example()
self.assertAllEqual(
tf.train.Example.FromString(generated_example).features
.feature['image/object/class/label'].int64_list.value, [5])
self.assertAllEqual(
tf.train.Example.FromString(generated_example).features
.feature['image/object/class/text'].bytes_list.value, ['hyena'])
output = inference_fn.process(generated_example)
output_example = output[0]
self.assert_expected_example(output_example, botk=True)
def test_beam_pipeline(self):
with InMemoryTFRecord([self._create_tf_example()]) as input_tfrecord:
runner = runners.DirectRunner()
temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
saved_model_path = self._export_saved_model()
top_k_embedding_count = 1
bottom_k_embedding_count = 0
num_shards = 1
pipeline = generate_embedding_data.construct_pipeline(
input_tfrecord, output_tfrecord, saved_model_path,
top_k_embedding_count, bottom_k_embedding_count, num_shards)
runner.run(pipeline)
filenames = tf.io.gfile.glob(
output_tfrecord + '-?????-of-?????')
actual_output = []
record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
for record in record_iterator:
actual_output.append(record)
self.assertEqual(len(actual_output), 1)
self.assert_expected_example(tf.train.Example.FromString(
actual_output[0]))
if __name__ == '__main__':
tf.test.main()
......@@ -24,10 +24,18 @@ import six
import tensorflow.compat.v1 as tf
from object_detection.dataset_tools import seq_example_util
from object_detection.utils import tf_version
class SeqExampleUtilTest(tf.test.TestCase):
def materialize_tensors(self, list_of_tensors):
if tf_version.is_tf2():
return [tensor.numpy() for tensor in list_of_tensors]
else:
with self.cached_session() as sess:
return sess.run(list_of_tensors)
def test_make_unlabeled_example(self):
num_frames = 5
image_height = 100
......@@ -41,8 +49,7 @@ class SeqExampleUtilTest(tf.test.TestCase):
image_source_ids = [str(idx) for idx in range(num_frames)]
images_list = tf.unstack(images, axis=0)
encoded_images_list = [tf.io.encode_jpeg(image) for image in images_list]
with tf.Session() as sess:
encoded_images = sess.run(encoded_images_list)
encoded_images = self.materialize_tensors(encoded_images_list)
seq_example = seq_example_util.make_sequence_example(
dataset_name=dataset_name,
video_id=video_id,
......@@ -109,8 +116,7 @@ class SeqExampleUtilTest(tf.test.TestCase):
dtype=tf.int32), dtype=tf.uint8)
images_list = tf.unstack(images, axis=0)
encoded_images_list = [tf.io.encode_jpeg(image) for image in images_list]
with tf.Session() as sess:
encoded_images = sess.run(encoded_images_list)
encoded_images = self.materialize_tensors(encoded_images_list)
timestamps = [100000, 110000]
is_annotated = [1, 0]
bboxes = [
......@@ -208,8 +214,7 @@ class SeqExampleUtilTest(tf.test.TestCase):
dtype=tf.int32), dtype=tf.uint8)
images_list = tf.unstack(images, axis=0)
encoded_images_list = [tf.io.encode_jpeg(image) for image in images_list]
with tf.Session() as sess:
encoded_images = sess.run(encoded_images_list)
encoded_images = self.materialize_tensors(encoded_images_list)
bboxes = [
np.array([[0., 0., 0.75, 0.75],
[0., 0., 1., 1.]], dtype=np.float32),
......
......@@ -52,6 +52,8 @@ EVAL_METRICS_CLASS_DICT = {
coco_evaluation.CocoKeypointEvaluator,
'coco_mask_metrics':
coco_evaluation.CocoMaskEvaluator,
'coco_panoptic_metrics':
coco_evaluation.CocoPanopticSegmentationEvaluator,
'oid_challenge_detection_metrics':
object_detection_evaluation.OpenImagesDetectionChallengeEvaluator,
'oid_challenge_segmentation_metrics':
......
......@@ -18,6 +18,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import unittest
from absl.testing import parameterized
import numpy as np
......@@ -30,6 +31,7 @@ from object_detection.core import standard_fields as fields
from object_detection.metrics import coco_evaluation
from object_detection.protos import eval_pb2
from object_detection.utils import test_case
from object_detection.utils import tf_version
class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
......@@ -127,6 +129,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
{'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': False},
{'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': False}
)
@unittest.skipIf(tf_version.is_tf2(), 'Only compatible with TF1.X')
def test_get_eval_metric_ops_for_coco_detections(self, batch_size=1,
max_gt_boxes=None,
scale_to_absolute=False):
......@@ -155,6 +158,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
{'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': False},
{'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': False}
)
@unittest.skipIf(tf_version.is_tf2(), 'Only compatible with TF1.X')
def test_get_eval_metric_ops_for_coco_detections_and_masks(
self, batch_size=1, max_gt_boxes=None, scale_to_absolute=False):
eval_config = eval_pb2.EvalConfig()
......@@ -185,6 +189,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
{'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': False},
{'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': False}
)
@unittest.skipIf(tf_version.is_tf2(), 'Only compatible with TF1.X')
def test_get_eval_metric_ops_for_coco_detections_and_resized_masks(
self, batch_size=1, max_gt_boxes=None, scale_to_absolute=False):
eval_config = eval_pb2.EvalConfig()
......@@ -210,6 +215,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
self.assertAlmostEqual(1.0, metrics['DetectionBoxes_Precision/mAP'])
self.assertAlmostEqual(1.0, metrics['DetectionMasks_Precision/mAP'])
@unittest.skipIf(tf_version.is_tf2(), 'Only compatible with TF1.X')
def test_get_eval_metric_ops_raises_error_with_unsupported_metric(self):
eval_config = eval_pb2.EvalConfig()
eval_config.metrics_set.extend(['unsupported_metric'])
......@@ -334,63 +340,67 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
dtype=np.float32)
detection_keypoints = np.array([[0.0, 0.0], [0.5, 0.5], [1.0, 1.0]],
dtype=np.float32)
detections = {
detection_fields.detection_boxes:
tf.constant(detection_boxes),
detection_fields.detection_scores:
tf.constant([[1.], [1.]]),
detection_fields.detection_classes:
tf.constant([[1], [2]]),
detection_fields.num_detections:
tf.constant([1, 1]),
detection_fields.detection_keypoints:
tf.tile(
tf.reshape(
tf.constant(detection_keypoints), shape=[1, 1, 3, 2]),
multiples=[2, 1, 1, 1])
}
gt_boxes = detection_boxes
groundtruth = {
input_data_fields.groundtruth_boxes:
tf.constant(gt_boxes),
input_data_fields.groundtruth_classes:
tf.constant([[1.], [1.]]),
input_data_fields.groundtruth_keypoints:
tf.tile(
tf.reshape(
tf.constant(detection_keypoints), shape=[1, 1, 3, 2]),
multiples=[2, 1, 1, 1])
}
image = tf.zeros((2, 100, 100, 3), dtype=tf.float32)
true_image_shapes = tf.constant([[100, 100, 3], [50, 100, 3]])
original_image_spatial_shapes = tf.constant([[200, 200], [150, 300]])
result = eval_util.result_dict_for_batched_example(
image, key, detections, groundtruth,
scale_to_absolute=True,
true_image_shapes=true_image_shapes,
original_image_spatial_shapes=original_image_spatial_shapes,
max_gt_boxes=tf.constant(1))
with self.test_session() as sess:
result = sess.run(result)
self.assertAllEqual(
[[[0., 0., 200., 200.]], [[0.0, 0.0, 150., 150.]]],
result[input_data_fields.groundtruth_boxes])
self.assertAllClose([[[[0., 0.], [100., 100.], [200., 200.]]],
[[[0., 0.], [150., 150.], [300., 300.]]]],
result[input_data_fields.groundtruth_keypoints])
# Predictions from the model are not scaled.
self.assertAllEqual(
[[[0., 0., 200., 200.]], [[0.0, 0.0, 75., 150.]]],
result[detection_fields.detection_boxes])
self.assertAllClose([[[[0., 0.], [100., 100.], [200., 200.]]],
[[[0., 0.], [75., 150.], [150., 300.]]]],
result[detection_fields.detection_keypoints])
def graph_fn():
detections = {
detection_fields.detection_boxes:
tf.constant(detection_boxes),
detection_fields.detection_scores:
tf.constant([[1.], [1.]]),
detection_fields.detection_classes:
tf.constant([[1], [2]]),
detection_fields.num_detections:
tf.constant([1, 1]),
detection_fields.detection_keypoints:
tf.tile(
tf.reshape(
tf.constant(detection_keypoints), shape=[1, 1, 3, 2]),
multiples=[2, 1, 1, 1])
}
gt_boxes = detection_boxes
groundtruth = {
input_data_fields.groundtruth_boxes:
tf.constant(gt_boxes),
input_data_fields.groundtruth_classes:
tf.constant([[1.], [1.]]),
input_data_fields.groundtruth_keypoints:
tf.tile(
tf.reshape(
tf.constant(detection_keypoints), shape=[1, 1, 3, 2]),
multiples=[2, 1, 1, 1])
}
image = tf.zeros((2, 100, 100, 3), dtype=tf.float32)
true_image_shapes = tf.constant([[100, 100, 3], [50, 100, 3]])
original_image_spatial_shapes = tf.constant([[200, 200], [150, 300]])
result = eval_util.result_dict_for_batched_example(
image, key, detections, groundtruth,
scale_to_absolute=True,
true_image_shapes=true_image_shapes,
original_image_spatial_shapes=original_image_spatial_shapes,
max_gt_boxes=tf.constant(1))
return (result[input_data_fields.groundtruth_boxes],
result[input_data_fields.groundtruth_keypoints],
result[detection_fields.detection_boxes],
result[detection_fields.detection_keypoints])
(gt_boxes, gt_keypoints, detection_boxes,
detection_keypoints) = self.execute_cpu(graph_fn, [])
self.assertAllEqual(
[[[0., 0., 200., 200.]], [[0.0, 0.0, 150., 150.]]],
gt_boxes)
self.assertAllClose([[[[0., 0.], [100., 100.], [200., 200.]]],
[[[0., 0.], [150., 150.], [300., 300.]]]],
gt_keypoints)
# Predictions from the model are not scaled.
self.assertAllEqual(
[[[0., 0., 200., 200.]], [[0.0, 0.0, 75., 150.]]],
detection_boxes)
self.assertAllClose([[[[0., 0.], [100., 100.], [200., 200.]]],
[[[0., 0.], [75., 150.], [150., 300.]]]],
detection_keypoints)
if __name__ == '__main__':
......
......@@ -134,6 +134,30 @@ flags.DEFINE_string('config_override', '',
'text proto to override pipeline_config_path.')
flags.DEFINE_boolean('write_inference_graph', False,
'If true, writes inference graph to disk.')
flags.DEFINE_string('additional_output_tensor_names', None,
'Additional Tensors to output, to be specified as a comma '
'separated list of tensor names.')
flags.DEFINE_boolean('use_side_inputs', False,
'If True, uses side inputs as well as image inputs.')
flags.DEFINE_string('side_input_shapes', None,
'If use_side_inputs is True, this explicitly sets '
'the shape of the side input tensors to a fixed size. The '
'dimensions are to be provided as a comma-separated list '
'of integers. A value of -1 can be used for unknown '
'dimensions. A `/` denotes a break, starting the shape of '
'the next side input tensor. This flag is required if '
'using side inputs.')
flags.DEFINE_string('side_input_types', None,
'If use_side_inputs is True, this explicitly sets '
'the type of the side input tensors. The '
'dimensions are to be provided as a comma-separated list '
'of types, each of `string`, `integer`, or `float`. '
'This flag is required if using side inputs.')
flags.DEFINE_string('side_input_names', None,
'If use_side_inputs is True, this explicitly sets '
'the names of the side input tensors required by the model '
'assuming the names will be a comma-separated list of '
'strings. This flag is required if using side inputs.')
tf.app.flags.mark_flag_as_required('pipeline_config_path')
tf.app.flags.mark_flag_as_required('trained_checkpoint_prefix')
tf.app.flags.mark_flag_as_required('output_directory')
......@@ -152,10 +176,30 @@ def main(_):
]
else:
input_shape = None
if FLAGS.use_side_inputs:
side_input_shapes, side_input_names, side_input_types = (
exporter.parse_side_inputs(
FLAGS.side_input_shapes,
FLAGS.side_input_names,
FLAGS.side_input_types))
else:
side_input_shapes = None
side_input_names = None
side_input_types = None
if FLAGS.additional_output_tensor_names:
additional_output_tensor_names = list(
FLAGS.additional_output_tensor_names.split(','))
else:
additional_output_tensor_names = None
exporter.export_inference_graph(
FLAGS.input_type, pipeline_config, FLAGS.trained_checkpoint_prefix,
FLAGS.output_directory, input_shape=input_shape,
write_inference_graph=FLAGS.write_inference_graph)
write_inference_graph=FLAGS.write_inference_graph,
additional_output_tensor_names=additional_output_tensor_names,
use_side_inputs=FLAGS.use_side_inputs,
side_input_shapes=side_input_shapes,
side_input_names=side_input_names,
side_input_types=side_input_types)
if __name__ == '__main__':
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment