Commit 7d1cfc1e authored by Yeqing Li's avatar Yeqing Li Committed by A. Unique TensorFlower
Browse files

Adds files to utils folder.

PiperOrigin-RevId: 276317091
parent 638ba7a4
......@@ -26,6 +26,75 @@ EPSILON = 1e-8
BBOX_XFORM_CLIP = np.log(1000. / 16.)
def yxyx_to_xywh(boxes):
"""Converts boxes from ymin, xmin, ymax, xmax to xmin, ymin, width, height.
Args:
boxes: a numpy array whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
Returns:
boxes: a numpy array whose shape is the same as `boxes` in new format.
Raises:
ValueError: If the last dimension of boxes is not 4.
"""
if boxes.shape[-1] != 4:
raise ValueError('boxes.shape[-1] is {:d}, but must be 4.'.format(
boxes.shape[-1]))
boxes_ymin = boxes[..., 0]
boxes_xmin = boxes[..., 1]
boxes_width = boxes[..., 3] - boxes[..., 1]
boxes_height = boxes[..., 2] - boxes[..., 0]
new_boxes = np.stack([boxes_xmin, boxes_ymin, boxes_width, boxes_height],
axis=-1)
return new_boxes
def jitter_boxes(boxes, noise_scale=0.025):
"""Jitter the box coordinates by some noise distribution.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates of
boxes in ymin, xmin, ymax, xmax order.
noise_scale: a python float which specifies the magnitude of noise. The rule
of thumb is to set this between (0, 0.1]. The default value is found to
mimic the noisy detections best empirically.
Returns:
jittered_boxes: a tensor whose shape is the same as `boxes` representing
the jittered boxes.
Raises:
ValueError: If the last dimension of boxes is not 4.
"""
if boxes.shape[-1] != 4:
raise ValueError('boxes.shape[-1] is {:d}, but must be 4.'.format(
boxes.shape[-1]))
with tf.name_scope('jitter_boxes'):
bbox_jitters = tf.random.normal(boxes.get_shape(), stddev=noise_scale)
ymin = boxes[..., 0:1]
xmin = boxes[..., 1:2]
ymax = boxes[..., 2:3]
xmax = boxes[..., 3:4]
width = xmax - xmin
height = ymax - ymin
new_center_x = (xmin + xmax) / 2.0 + bbox_jitters[..., 0:1] * width
new_center_y = (ymin + ymax) / 2.0 + bbox_jitters[..., 1:2] * height
new_width = width * tf.math.exp(bbox_jitters[..., 2:3])
new_height = height * tf.math.exp(bbox_jitters[..., 3:4])
jittered_boxes = tf.concat([
new_center_y - new_height * 0.5, new_center_x - new_width * 0.5,
new_center_y + new_height * 0.5, new_center_x + new_width * 0.5
],
axis=-1)
return jittered_boxes
def normalize_boxes(boxes, image_shape):
"""Converts boxes to the normalized coordinates.
......@@ -44,8 +113,8 @@ def normalize_boxes(boxes, image_shape):
ValueError: If the last dimension of boxes is not 4.
"""
if boxes.shape[-1] != 4:
raise ValueError(
'boxes.shape[1] is {:d}, but must be 4.'.format(boxes.shape[1]))
raise ValueError('boxes.shape[-1] is {:d}, but must be 4.'.format(
boxes.shape[-1]))
with tf.name_scope('normalize_boxes'):
if isinstance(image_shape, list) or isinstance(image_shape, tuple):
......@@ -86,13 +155,13 @@ def denormalize_boxes(boxes, image_shape):
height, width = image_shape
else:
image_shape = tf.cast(image_shape, dtype=boxes.dtype)
height = image_shape[..., 0:1]
width = image_shape[..., 1:2]
height, width = tf.split(image_shape, 2, axis=-1)
ymin = boxes[..., 0:1] * height
xmin = boxes[..., 1:2] * width
ymax = boxes[..., 2:3] * height
xmax = boxes[..., 3:4] * width
ymin, xmin, ymax, xmax = tf.split(boxes, 4, axis=-1)
ymin = ymin * height
xmin = xmin * width
ymax = ymax * height
xmax = xmax * width
denormalized_boxes = tf.concat([ymin, xmin, ymax, xmax], axis=-1)
return denormalized_boxes
......@@ -116,10 +185,10 @@ def clip_boxes(boxes, image_shape):
ValueError: If the last dimension of boxes is not 4.
"""
if boxes.shape[-1] != 4:
raise ValueError(
'boxes.shape[1] is {:d}, but must be 4.'.format(boxes.shape[1]))
raise ValueError('boxes.shape[-1] is {:d}, but must be 4.'.format(
boxes.shape[-1]))
with tf.name_scope('crop_boxes'):
with tf.name_scope('clip_boxes'):
if isinstance(image_shape, list) or isinstance(image_shape, tuple):
height, width = image_shape
else:
......@@ -132,10 +201,10 @@ def clip_boxes(boxes, image_shape):
ymax = boxes[..., 2:3]
xmax = boxes[..., 3:4]
clipped_ymin = tf.maximum(tf.minimum(ymin, height - 1.0), 0.0)
clipped_ymax = tf.maximum(tf.minimum(ymax, height - 1.0), 0.0)
clipped_xmin = tf.maximum(tf.minimum(xmin, width - 1.0), 0.0)
clipped_xmax = tf.maximum(tf.minimum(xmax, width - 1.0), 0.0)
clipped_ymin = tf.math.maximum(tf.math.minimum(ymin, height - 1.0), 0.0)
clipped_ymax = tf.math.maximum(tf.math.minimum(ymax, height - 1.0), 0.0)
clipped_xmin = tf.math.maximum(tf.math.minimum(xmin, width - 1.0), 0.0)
clipped_xmax = tf.math.maximum(tf.math.minimum(xmax, width - 1.0), 0.0)
clipped_boxes = tf.concat(
[clipped_ymin, clipped_xmin, clipped_ymax, clipped_xmax],
......@@ -143,14 +212,47 @@ def clip_boxes(boxes, image_shape):
return clipped_boxes
def compute_outer_boxes(boxes, image_shape, scale=1.0):
"""Compute outer box encloses an object with a margin.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates of
boxes in ymin, xmin, ymax, xmax order.
image_shape: a list of two integers, a two-element vector or a tensor such
that all but the last dimensions are `broadcastable` to `boxes`. The last
dimension is 2, which represents [height, width].
scale: a float number specifying the scale of output outer boxes to input
`boxes`.
Returns:
outer_boxes: a tensor whose shape is the same as `boxes` representing the
outer boxes.
"""
if scale < 1.0:
raise ValueError(
'scale is {}, but outer box scale must be greater than 1.0.'.format(
scale))
centers_y = (boxes[..., 0] + boxes[..., 2]) / 2.0
centers_x = (boxes[..., 1] + boxes[..., 3]) / 2.0
box_height = (boxes[..., 2] - boxes[..., 0]) * scale
box_width = (boxes[..., 3] - boxes[..., 1]) * scale
outer_boxes = tf.stack([
centers_y - box_height / 2.0, centers_x - box_width / 2.0,
centers_y + box_height / 2.0, centers_x + box_width / 2.0
],
axis=1)
outer_boxes = clip_boxes(outer_boxes, image_shape)
return outer_boxes
def encode_boxes(boxes, anchors, weights=None):
"""Encode boxes to targets.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
anchors: a tensor whose shape is the same as `boxes` representing the
coordinates of anchors in ymin, xmin, ymax, xmax order.
anchors: a tensor whose shape is the same as, or `broadcastable` to `boxes`,
representing the coordinates of anchors in ymin, xmin, ymax, xmax order.
weights: None or a list of four float numbers used to scale coordinates.
Returns:
......@@ -161,8 +263,8 @@ def encode_boxes(boxes, anchors, weights=None):
ValueError: If the last dimension of boxes is not 4.
"""
if boxes.shape[-1] != 4:
raise ValueError(
'boxes.shape[1] is {:d}, but must be 4.'.format(boxes.shape[1]))
raise ValueError('boxes.shape[-1] is {:d}, but must be 4.'.format(
boxes.shape[-1]))
with tf.name_scope('encode_boxes'):
boxes = tf.cast(boxes, dtype=anchors.dtype)
......@@ -206,14 +308,18 @@ def decode_boxes(encoded_boxes, anchors, weights=None):
Args:
encoded_boxes: a tensor whose last dimension is 4 representing the
coordinates of encoded boxes in ymin, xmin, ymax, xmax order.
anchors: a tensor whose shape is the same as `boxes` representing the
coordinates of anchors in ymin, xmin, ymax, xmax order.
anchors: a tensor whose shape is the same as, or `broadcastable` to `boxes`,
representing the coordinates of anchors in ymin, xmin, ymax, xmax order.
weights: None or a list of four float numbers used to scale coordinates.
Returns:
encoded_boxes: a tensor whose shape is the same as `boxes` representing the
decoded box targets.
"""
if encoded_boxes.shape[-1] != 4:
raise ValueError('encoded_boxes.shape[-1] is {:d}, but must be 4.'.format(
encoded_boxes.shape[-1]))
with tf.name_scope('decode_boxes'):
encoded_boxes = tf.cast(encoded_boxes, dtype=anchors.dtype)
dy = encoded_boxes[..., 0:1]
......@@ -225,8 +331,8 @@ def decode_boxes(encoded_boxes, anchors, weights=None):
dx /= weights[1]
dh /= weights[2]
dw /= weights[3]
dh = tf.minimum(dh, BBOX_XFORM_CLIP)
dw = tf.minimum(dw, BBOX_XFORM_CLIP)
dh = tf.math.minimum(dh, BBOX_XFORM_CLIP)
dw = tf.math.minimum(dw, BBOX_XFORM_CLIP)
anchor_ymin = anchors[..., 0:1]
anchor_xmin = anchors[..., 1:2]
......@@ -239,8 +345,8 @@ def decode_boxes(encoded_boxes, anchors, weights=None):
decoded_boxes_yc = dy * anchor_h + anchor_yc
decoded_boxes_xc = dx * anchor_w + anchor_xc
decoded_boxes_h = tf.exp(dh) * anchor_h
decoded_boxes_w = tf.exp(dw) * anchor_w
decoded_boxes_h = tf.math.exp(dh) * anchor_h
decoded_boxes_w = tf.math.exp(dw) * anchor_w
decoded_boxes_ymin = decoded_boxes_yc - 0.5 * decoded_boxes_h
decoded_boxes_xmin = decoded_boxes_xc - 0.5 * decoded_boxes_w
......@@ -252,3 +358,178 @@ def decode_boxes(encoded_boxes, anchors, weights=None):
decoded_boxes_ymax, decoded_boxes_xmax],
axis=-1)
return decoded_boxes
def filter_boxes(boxes, scores, image_shape, min_size_threshold):
"""Filter and remove boxes that are too small or fall outside the image.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates of
boxes in ymin, xmin, ymax, xmax order.
scores: a tensor whose shape is the same as tf.shape(boxes)[:-1]
representing the original scores of the boxes.
image_shape: a tensor whose shape is the same as, or `broadcastable` to
`boxes` except the last dimension, which is 2, representing [height,
width] of the scaled image.
min_size_threshold: a float representing the minimal box size in each side
(w.r.t. the scaled image). Boxes whose sides are smaller than it will be
filtered out.
Returns:
filtered_boxes: a tensor whose shape is the same as `boxes` but with
the position of the filtered boxes are filled with 0.
filtered_scores: a tensor whose shape is the same as 'scores' but with
the positinon of the filtered boxes filled with 0.
"""
if boxes.shape[-1] != 4:
raise ValueError('boxes.shape[1] is {:d}, but must be 4.'.format(
boxes.shape[-1]))
with tf.name_scope('filter_boxes'):
if isinstance(image_shape, list) or isinstance(image_shape, tuple):
height, width = image_shape
else:
image_shape = tf.cast(image_shape, dtype=boxes.dtype)
height = image_shape[..., 0]
width = image_shape[..., 1]
ymin = boxes[..., 0]
xmin = boxes[..., 1]
ymax = boxes[..., 2]
xmax = boxes[..., 3]
h = ymax - ymin + 1.0
w = xmax - xmin + 1.0
yc = ymin + 0.5 * h
xc = xmin + 0.5 * w
min_size = tf.cast(
tf.math.maximum(min_size_threshold, 1.0), dtype=boxes.dtype)
filtered_size_mask = tf.math.logical_and(
tf.math.greater(h, min_size), tf.math.greater(w, min_size))
filtered_center_mask = tf.logical_and(
tf.math.logical_and(tf.math.greater(yc, 0.0), tf.math.less(yc, height)),
tf.math.logical_and(tf.math.greater(xc, 0.0), tf.math.less(xc, width)))
filtered_mask = tf.math.logical_and(filtered_size_mask,
filtered_center_mask)
filtered_scores = tf.where(filtered_mask, scores, tf.zeros_like(scores))
filtered_boxes = tf.cast(
tf.expand_dims(filtered_mask, axis=-1), dtype=boxes.dtype) * boxes
return filtered_boxes, filtered_scores
def filter_boxes_by_scores(boxes, scores, min_score_threshold):
"""Filter and remove boxes whose scores are smaller than the threshold.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates of
boxes in ymin, xmin, ymax, xmax order.
scores: a tensor whose shape is the same as tf.shape(boxes)[:-1]
representing the original scores of the boxes.
min_score_threshold: a float representing the minimal box score threshold.
Boxes whose score are smaller than it will be filtered out.
Returns:
filtered_boxes: a tensor whose shape is the same as `boxes` but with
the position of the filtered boxes are filled with 0.
filtered_scores: a tensor whose shape is the same as 'scores' but with
the
"""
if boxes.shape[-1] != 4:
raise ValueError('boxes.shape[1] is {:d}, but must be 4.'.format(
boxes.shape[-1]))
with tf.name_scope('filter_boxes_by_scores'):
filtered_mask = tf.math.greater(scores, min_score_threshold)
filtered_scores = tf.where(filtered_mask, scores, tf.zeros_like(scores))
filtered_boxes = tf.cast(
tf.expand_dims(filtered_mask, axis=-1), dtype=boxes.dtype) * boxes
return filtered_boxes, filtered_scores
def top_k_boxes(boxes, scores, k):
"""Sort and select top k boxes according to the scores.
Args:
boxes: a tensor of shape [batch_size, N, 4] representing the coordiante of
the boxes. N is the number of boxes per image.
scores: a tensor of shsape [batch_size, N] representing the socre of the
boxes.
k: an integer or a tensor indicating the top k number.
Returns:
selected_boxes: a tensor of shape [batch_size, k, 4] representing the
selected top k box coordinates.
selected_scores: a tensor of shape [batch_size, k] representing the selected
top k box scores.
"""
with tf.name_scope('top_k_boxes'):
selected_scores, top_k_indices = tf.nn.top_k(scores, k=k, sorted=True)
batch_size, _ = scores.get_shape().as_list()
if batch_size == 1:
selected_boxes = tf.squeeze(
tf.gather(boxes, top_k_indices, axis=1), axis=1)
else:
top_k_indices_shape = tf.shape(top_k_indices)
batch_indices = (
tf.expand_dims(tf.range(top_k_indices_shape[0]), axis=-1) *
tf.ones([1, top_k_indices_shape[-1]], dtype=tf.int32))
gather_nd_indices = tf.stack([batch_indices, top_k_indices], axis=-1)
selected_boxes = tf.gather_nd(boxes, gather_nd_indices)
return selected_boxes, selected_scores
def bbox_overlap(boxes, gt_boxes):
"""Calculates the overlap between proposal and ground truth boxes.
Some `gt_boxes` may have been padded. The returned `iou` tensor for these
boxes will be -1.
Args:
boxes: a tensor with a shape of [batch_size, N, 4]. N is the number of
proposals before groundtruth assignment (e.g., rpn_post_nms_topn). The
last dimension is the pixel coordinates in [ymin, xmin, ymax, xmax] form.
gt_boxes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES, 4]. This
tensor might have paddings with a negative value.
Returns:
iou: a tensor with as a shape of [batch_size, N, MAX_NUM_INSTANCES].
"""
with tf.name_scope('bbox_overlap'):
bb_y_min, bb_x_min, bb_y_max, bb_x_max = tf.split(
value=boxes, num_or_size_splits=4, axis=2)
gt_y_min, gt_x_min, gt_y_max, gt_x_max = tf.split(
value=gt_boxes, num_or_size_splits=4, axis=2)
# Calculates the intersection area.
i_xmin = tf.math.maximum(bb_x_min, tf.transpose(gt_x_min, [0, 2, 1]))
i_xmax = tf.math.minimum(bb_x_max, tf.transpose(gt_x_max, [0, 2, 1]))
i_ymin = tf.math.maximum(bb_y_min, tf.transpose(gt_y_min, [0, 2, 1]))
i_ymax = tf.math.minimum(bb_y_max, tf.transpose(gt_y_max, [0, 2, 1]))
i_area = tf.math.maximum((i_xmax - i_xmin), 0) * tf.math.maximum(
(i_ymax - i_ymin), 0)
# Calculates the union area.
bb_area = (bb_y_max - bb_y_min) * (bb_x_max - bb_x_min)
gt_area = (gt_y_max - gt_y_min) * (gt_x_max - gt_x_min)
# Adds a small epsilon to avoid divide-by-zero.
u_area = bb_area + tf.transpose(gt_area, [0, 2, 1]) - i_area + 1e-8
# Calculates IoU.
iou = i_area / u_area
# Fills -1 for IoU entries between the padded ground truth boxes.
gt_invalid_mask = tf.less(
tf.reduce_max(gt_boxes, axis=-1, keepdims=True), 0.0)
padding_mask = tf.logical_or(
tf.zeros_like(bb_x_min, dtype=tf.bool),
tf.transpose(gt_invalid_mask, [0, 2, 1]))
iou = tf.where(padding_mask, -tf.ones_like(iou), iou)
return iou
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utility functions for handling dataset object categories."""
def coco_split_class_ids(split_name):
"""Return the COCO class split ids based on split name and training mode.
Args:
split_name: The name of dataset split.
Returns:
class_ids: a python list of integer.
"""
if split_name == 'all':
return []
elif split_name == 'voc':
return [
1, 2, 3, 4, 5, 6, 7, 9, 16, 17, 18, 19, 20, 21, 44, 62, 63, 64, 67, 72
]
elif split_name == 'nonvoc':
return [
8, 10, 11, 13, 14, 15, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36,
37, 38, 39, 40, 41, 42, 43, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
57, 58, 59, 60, 61, 65, 70, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84,
85, 86, 87, 88, 89, 90
]
else:
raise ValueError('Invalid split name {}!!!'.format(split_name))
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utility functions for dataloader."""
import tensorflow.compat.v2 as tf
from official.vision.detection.utils import input_utils
def process_source_id(source_id):
"""Processes source_id to the right format."""
if source_id.dtype == tf.string:
source_id = tf.cast(tf.strings.to_number(source_id), tf.int64)
with tf.control_dependencies([source_id]):
source_id = tf.cond(
pred=tf.equal(tf.size(input=source_id), 0),
true_fn=lambda: tf.cast(tf.constant(-1), tf.int64),
false_fn=lambda: tf.identity(source_id))
return source_id
def pad_groundtruths_to_fixed_size(gt, n):
"""Pads the first dimension of groundtruths labels to the fixed size."""
gt['boxes'] = input_utils.pad_to_fixed_size(gt['boxes'], n, -1)
gt['is_crowds'] = input_utils.pad_to_fixed_size(gt['is_crowds'], n, 0)
gt['areas'] = input_utils.pad_to_fixed_size(gt['areas'], n, -1)
gt['classes'] = input_utils.pad_to_fixed_size(gt['classes'], n, -1)
return gt
......@@ -182,6 +182,109 @@ def resize_and_crop_image(image,
scaled_image = tf.image.resize(
image, tf.cast(scaled_size, tf.int32), method=method)
if random_jittering:
scaled_image = scaled_image[offset[0]:offset[0] + desired_size[0],
offset[1]:offset[1] + desired_size[1], :]
output_image = tf.image.pad_to_bounding_box(scaled_image, 0, 0,
padded_size[0], padded_size[1])
image_info = tf.stack(
[image_size, scaled_size, image_scale,
tf.cast(offset, tf.float32)])
return output_image, image_info
def resize_and_crop_image_v2(image,
short_side,
long_side,
padded_size,
aug_scale_min=1.0,
aug_scale_max=1.0,
seed=1,
method=tf.image.ResizeMethod.BILINEAR):
"""Resizes the input image to output size (Faster R-CNN style).
Resize and pad images given the specified short / long side length and the
stride size.
Here are the preprocessing steps.
1. For a given image, keep its aspect ratio and first try to rescale the short
side of the original image to `short_side`.
2. If the scaled image after 1 has a long side that exceeds `long_side`, keep
the aspect ratio and rescal the long side of the image to `long_side`.
2. Pad the rescaled image to the padded_size.
Args:
image: a `Tensor` of shape [height, width, 3] representing an image.
short_side: a scalar `Tensor` or `int` representing the desired short side
to be rescaled to.
long_side: a scalar `Tensor` or `int` representing the desired long side to
be rescaled to.
padded_size: a `Tensor` or `int` list/tuple of two elements representing
[height, width] of the padded output image size. Padding will be applied
after scaling the image to the desired_size.
aug_scale_min: a `float` with range between [0, 1.0] representing minimum
random scale applied to desired_size for training scale jittering.
aug_scale_max: a `float` with range between [1.0, inf] representing maximum
random scale applied to desired_size for training scale jittering.
seed: seed for random scale jittering.
method: function to resize input image to scaled image.
Returns:
output_image: `Tensor` of shape [height, width, 3] where [height, width]
equals to `output_size`.
image_info: a 2D `Tensor` that encodes the information of the image and the
applied preprocessing. It is in the format of
[[original_height, original_width], [scaled_height, scaled_width],
[y_scale, x_scale], [y_offset, x_offset]], where [scaled_height,
scaled_width] is the actual scaled image size, and [y_scale, x_scale] is
the scaling factor, which is the ratio of
scaled dimension / original dimension.
"""
with tf.name_scope('resize_and_crop_image_v2'):
image_size = tf.cast(tf.shape(image)[0:2], tf.float32)
scale_using_short_side = (
short_side / tf.math.minimum(image_size[0], image_size[1]))
scale_using_long_side = (
long_side / tf.math.maximum(image_size[0], image_size[1]))
scaled_size = tf.math.round(image_size * scale_using_short_side)
scaled_size = tf.where(
tf.math.greater(
tf.math.maximum(scaled_size[0], scaled_size[1]), long_side),
tf.math.round(image_size * scale_using_long_side), scaled_size)
desired_size = scaled_size
random_jittering = (aug_scale_min != 1.0 or aug_scale_max != 1.0)
if random_jittering:
random_scale = tf.random.uniform([],
aug_scale_min,
aug_scale_max,
seed=seed)
scaled_size = tf.math.round(random_scale * scaled_size)
# Computes 2D image_scale.
image_scale = scaled_size / image_size
# Selects non-zero random offset (x, y) if scaled image is larger than
# desired_size.
if random_jittering:
max_offset = scaled_size - desired_size
max_offset = tf.where(
tf.math.less(max_offset, 0), tf.zeros_like(max_offset), max_offset)
offset = max_offset * tf.random.uniform([
2,
], 0, 1, seed=seed)
offset = tf.cast(offset, tf.int32)
else:
offset = tf.zeros((2,), tf.int32)
scaled_image = tf.image.resize(
image, tf.cast(scaled_size, tf.int32), method=method)
if random_jittering:
scaled_image = scaled_image[
offset[0]:offset[0] + desired_size[0],
......
......@@ -100,7 +100,9 @@ def keypoint_flip_horizontal(keypoints, flip_point, flip_permutation,
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
with tf.name_scope(scope, 'FlipHorizontal'):
if not scope:
scope = 'FlipHorizontal'
with tf.name_scope(scope):
keypoints = tf.transpose(a=keypoints, perm=[1, 0, 2])
keypoints = tf.gather(keypoints, flip_permutation)
v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
......@@ -110,6 +112,70 @@ def keypoint_flip_horizontal(keypoints, flip_point, flip_permutation,
return new_keypoints
def keypoint_change_coordinate_frame(keypoints, window, scope=None):
"""Changes coordinate frame of the keypoints to be relative to window's frame.
Given a window of the form [y_min, x_min, y_max, x_max], changes keypoint
coordinates from keypoints of shape [num_instances, num_keypoints, 2]
to be relative to this window.
An example use case is data augmentation: where we are given groundtruth
keypoints and would like to randomly crop the image to some window. In this
case we need to change the coordinate frame of each groundtruth keypoint to be
relative to this new window.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
window we should change the coordinate frame to.
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
if not scope:
scope = 'ChangeCoordinateFrame'
with tf.name_scope(scope):
win_height = window[2] - window[0]
win_width = window[3] - window[1]
new_keypoints = box_list_ops.scale(keypoints - [window[0], window[1]],
1.0 / win_height, 1.0 / win_width)
return new_keypoints
def keypoint_prune_outside_window(keypoints, window, scope=None):
"""Prunes keypoints that fall outside a given window.
This function replaces keypoints that fall outside the given window with nan.
See also clip_to_window which clips any keypoints that fall outside the given
window.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
window outside of which the op should prune the keypoints.
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
if not scope:
scope = 'PruneOutsideWindow'
with tf.name_scope(scope):
y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
valid_indices = tf.logical_and(
tf.logical_and(y >= win_y_min, y <= win_y_max),
tf.logical_and(x >= win_x_min, x <= win_x_max))
new_y = tf.where(valid_indices, y, np.nan * tf.ones_like(y))
new_x = tf.where(valid_indices, x, np.nan * tf.ones_like(x))
new_keypoints = tf.concat([new_y, new_x], 2)
return new_keypoints
def random_horizontal_flip(image,
boxes=None,
masks=None,
......@@ -334,7 +400,7 @@ def resize_to_range(image,
if len(image.get_shape()) != 3:
raise ValueError('Image should be 3D tensor')
with tf.name_scope('ResizeToRange', values=[image, min_dimension]):
with tf.name_scope('ResizeToRange'):
if image.get_shape().is_fully_defined():
new_size = _compute_new_static_size(image, min_dimension, max_dimension)
else:
......@@ -389,7 +455,9 @@ def box_list_scale(boxlist, y_scale, x_scale, scope=None):
Returns:
boxlist: BoxList holding N boxes
"""
with tf.name_scope(scope, 'Scale'):
if not scope:
scope = 'Scale'
with tf.name_scope(scope):
y_scale = tf.cast(y_scale, tf.float32)
x_scale = tf.cast(x_scale, tf.float32)
y_min, x_min, y_max, x_max = tf.split(
......@@ -415,7 +483,9 @@ def keypoint_scale(keypoints, y_scale, x_scale, scope=None):
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
with tf.name_scope(scope, 'Scale'):
if not scope:
scope = 'Scale'
with tf.name_scope(scope):
y_scale = tf.cast(y_scale, tf.float32)
x_scale = tf.cast(x_scale, tf.float32)
new_keypoints = keypoints * [[[y_scale, x_scale]]]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment