Commit d59e9237 authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 471573654
parent c63a5e72
...@@ -18,12 +18,11 @@ import math ...@@ -18,12 +18,11 @@ import math
# Import libraries # Import libraries
import cv2 import cv2
import numpy as np import numpy as np
import tensorflow as tf
def paste_instance_masks(masks, def paste_instance_masks(masks: np.ndarray, detected_boxes: np.ndarray,
detected_boxes, image_height: int, image_width: int) -> np.ndarray:
image_height,
image_width):
"""Paste instance masks to generate the image segmentation results. """Paste instance masks to generate the image segmentation results.
Args: Args:
...@@ -39,13 +38,13 @@ def paste_instance_masks(masks, ...@@ -39,13 +38,13 @@ def paste_instance_masks(masks,
the instance masks *pasted* on the image canvas. the instance masks *pasted* on the image canvas.
""" """
def expand_boxes(boxes, scale): def expand_boxes(boxes: np.ndarray, scale: float) -> np.ndarray:
"""Expands an array of boxes by a given scale.""" """Expands an array of boxes by a given scale."""
# Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/utils/boxes.py#L227 # pylint: disable=line-too-long # Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/utils/boxes.py#L227 # pylint: disable=line-too-long
# The `boxes` in the reference implementation is in [x1, y1, x2, y2] form, # The `boxes` in the reference implementation is in [x1, y1, x2, y2] form,
# whereas `boxes` here is in [x1, y1, w, h] form # whereas `boxes` here is in [x1, y1, w, h] form
w_half = boxes[:, 2] * .5 w_half = boxes[:, 2] * 0.5
h_half = boxes[:, 3] * .5 h_half = boxes[:, 3] * 0.5
x_c = boxes[:, 0] + w_half x_c = boxes[:, 0] + w_half
y_c = boxes[:, 1] + h_half y_c = boxes[:, 1] + h_half
...@@ -104,10 +103,8 @@ def paste_instance_masks(masks, ...@@ -104,10 +103,8 @@ def paste_instance_masks(masks,
return segms return segms
def paste_instance_masks_v2(masks, def paste_instance_masks_v2(masks: np.ndarray, detected_boxes: np.ndarray,
detected_boxes, image_height: int, image_width: int) -> np.ndarray:
image_height,
image_width):
"""Paste instance masks to generate the image segmentation (v2). """Paste instance masks to generate the image segmentation (v2).
Args: Args:
...@@ -188,3 +185,65 @@ def paste_instance_masks_v2(masks, ...@@ -188,3 +185,65 @@ def paste_instance_masks_v2(masks,
segms = np.array(segms) segms = np.array(segms)
return segms return segms
def bbox2mask(bbox: tf.Tensor,
*,
image_height: int,
image_width: int,
dtype: tf.DType = tf.bool) -> tf.Tensor:
"""Converts bounding boxes to bitmasks.
Args:
bbox: A tensor in shape (..., 4) with arbitrary numbers of batch dimensions,
representing the absolute coordinates (ymin, xmin, ymax, xmax) for each
bounding box.
image_height: an integer representing the height of the image.
image_width: an integer representing the width of the image.
dtype: DType of the output bitmasks.
Returns:
A tensor in shape (..., height, width) which stores the bitmasks created
from the bounding boxes. For example:
>>> bbox2mask(tf.constant([[1,2,4,4]]),
image_height=5,
image_width=5,
dtype=tf.int32)
<tf.Tensor: shape=(1, 5, 5), dtype=int32, numpy=
array([[[0, 0, 0, 0, 0],
[0, 0, 1, 1, 0],
[0, 0, 1, 1, 0],
[0, 0, 1, 1, 0],
[0, 0, 0, 0, 0]]], dtype=int32)>
"""
bbox_shape = bbox.get_shape().as_list()
if bbox_shape[-1] != 4:
raise ValueError(
'Expected the last dimension of `bbox` has size == 4, but the shape '
'of `bbox` was: %s' % bbox_shape)
# (..., 1)
ymin = bbox[..., 0:1]
xmin = bbox[..., 1:2]
ymax = bbox[..., 2:3]
xmax = bbox[..., 3:4]
# (..., 1, width)
ymin = tf.expand_dims(tf.repeat(ymin, repeats=image_width, axis=-1), axis=-2)
# (..., height, 1)
xmin = tf.expand_dims(tf.repeat(xmin, repeats=image_height, axis=-1), axis=-1)
# (..., 1, width)
ymax = tf.expand_dims(tf.repeat(ymax, repeats=image_width, axis=-1), axis=-2)
# (..., height, 1)
xmax = tf.expand_dims(tf.repeat(xmax, repeats=image_height, axis=-1), axis=-1)
# (height, 1)
y_grid = tf.expand_dims(tf.range(image_height, dtype=bbox.dtype), axis=-1)
# (1, width)
x_grid = tf.expand_dims(tf.range(image_width, dtype=bbox.dtype), axis=-2)
# (..., height, width)
ymin_mask = y_grid >= ymin
xmin_mask = x_grid >= xmin
ymax_mask = y_grid < ymax
xmax_mask = x_grid < xmax
return tf.cast(ymin_mask & xmin_mask & ymax_mask & xmax_mask, dtype)
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Tests for mask_ops.py.""" """Tests for mask_ops.py."""
# Import libraries # Import libraries
...@@ -50,6 +49,57 @@ class MaskUtilsTest(tf.test.TestCase): ...@@ -50,6 +49,57 @@ class MaskUtilsTest(tf.test.TestCase):
np.array(masks > 0.5, dtype=np.uint8), np.array(masks > 0.5, dtype=np.uint8),
1e-5) 1e-5)
def testBbox2mask(self):
bboxes = tf.constant([[1, 2, 4, 4], [-1, -1, 3, 3], [2, 3, 6, 8],
[1, 1, 2, 2], [1, 1, 1, 4]])
masks = mask_ops.bbox2mask(
bboxes, image_height=5, image_width=6, dtype=tf.int32)
expected_masks = tf.constant(
[
[ # bbox = [1, 2, 4, 4]
[0, 0, 0, 0, 0, 0],
[0, 0, 1, 1, 0, 0],
[0, 0, 1, 1, 0, 0],
[0, 0, 1, 1, 0, 0],
[0, 0, 0, 0, 0, 0],
],
[ # bbox = [-1, -1, 3, 3]
[1, 1, 1, 0, 0, 0],
[1, 1, 1, 0, 0, 0],
[1, 1, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
],
[ # bbox = [2, 3, 6, 8]
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 1, 1],
[0, 0, 0, 1, 1, 1],
[0, 0, 0, 1, 1, 1],
],
[ # bbox = [1, 1, 2, 2]
[0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
],
[ # bbox = [1, 1, 1, 4]
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
]
],
dtype=tf.int32)
self.assertAllEqual(expected_masks, masks)
def testBbox2maskInvalidInput(self):
bboxes = tf.constant([[1, 2, 4, 4, 4], [-1, -1, 3, 3, 3]])
with self.assertRaisesRegex(ValueError, 'bbox.*size == 4'):
mask_ops.bbox2mask(bboxes, image_height=5, image_width=6, dtype=tf.int32)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -19,7 +19,8 @@ import tensorflow as tf ...@@ -19,7 +19,8 @@ import tensorflow as tf
_EPSILON = 1e-8 _EPSILON = 1e-8
def _feature_bilinear_interpolation(features, kernel_y, kernel_x): def _feature_bilinear_interpolation(features: tf.Tensor, kernel_y: tf.Tensor,
kernel_x: tf.Tensor) -> tf.Tensor:
"""Feature bilinear interpolation. """Feature bilinear interpolation.
The RoIAlign feature f can be computed by bilinear interpolation The RoIAlign feature f can be computed by bilinear interpolation
...@@ -67,8 +68,12 @@ def _feature_bilinear_interpolation(features, kernel_y, kernel_x): ...@@ -67,8 +68,12 @@ def _feature_bilinear_interpolation(features, kernel_y, kernel_x):
return features return features
def _compute_grid_positions(boxes, boundaries, output_size, sample_offset): def _compute_grid_positions(
"""Computes the grid position w.r.t. the corresponding feature map. boxes: tf.Tensor, boundaries: tf.Tensor, output_size: int,
sample_offset: float) -> tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]:
"""Computes the grid position w.r.t.
the corresponding feature map.
Args: Args:
boxes: a 3-D tensor of shape [batch_size, num_boxes, 4] encoding the boxes: a 3-D tensor of shape [batch_size, num_boxes, 4] encoding the
...@@ -135,10 +140,10 @@ def _compute_grid_positions(boxes, boundaries, output_size, sample_offset): ...@@ -135,10 +140,10 @@ def _compute_grid_positions(boxes, boundaries, output_size, sample_offset):
return kernel_y, kernel_x, box_gridy0y1, box_gridx0x1 return kernel_y, kernel_x, box_gridy0y1, box_gridx0x1
def multilevel_crop_and_resize(features, def multilevel_crop_and_resize(features: dict[str, tf.Tensor],
boxes, boxes: tf.Tensor,
output_size=7, output_size: int = 7,
sample_offset=0.5): sample_offset: float = 0.5) -> tf.Tensor:
"""Crop and resize on multilevel feature pyramid. """Crop and resize on multilevel feature pyramid.
Generate the (output_size, output_size) set of pixels for each input box Generate the (output_size, output_size) set of pixels for each input box
...@@ -282,13 +287,13 @@ def multilevel_crop_and_resize(features, ...@@ -282,13 +287,13 @@ def multilevel_crop_and_resize(features,
return features_per_box return features_per_box
def _selective_crop_and_resize(features, def _selective_crop_and_resize(features: tf.Tensor,
boxes, boxes: tf.Tensor,
box_levels, box_levels: tf.Tensor,
boundaries, boundaries: tf.Tensor,
output_size=7, output_size: int = 7,
sample_offset=0.5, sample_offset: float = 0.5,
use_einsum_gather=False): use_einsum_gather: bool = False) -> tf.Tensor:
"""Crop and resize boxes on a set of feature maps. """Crop and resize boxes on a set of feature maps.
Given multiple features maps indexed by different levels, and a set of boxes Given multiple features maps indexed by different levels, and a set of boxes
...@@ -434,12 +439,12 @@ def _selective_crop_and_resize(features, ...@@ -434,12 +439,12 @@ def _selective_crop_and_resize(features,
return features_per_box return features_per_box
def crop_mask_in_target_box(masks, def crop_mask_in_target_box(masks: tf.Tensor,
boxes, boxes: tf.Tensor,
target_boxes, target_boxes: tf.Tensor,
output_size, output_size: int,
sample_offset=0, sample_offset: float = 0.0,
use_einsum=True): use_einsum: bool = True) -> tf.Tensor:
"""Crop masks in target boxes. """Crop masks in target boxes.
Args: Args:
...@@ -515,7 +520,9 @@ def crop_mask_in_target_box(masks, ...@@ -515,7 +520,9 @@ def crop_mask_in_target_box(masks,
return cropped_masks return cropped_masks
def nearest_upsampling(data, scale, use_keras_layer=False): def nearest_upsampling(data: tf.Tensor,
scale: int,
use_keras_layer: bool = False) -> tf.Tensor:
"""Nearest neighbor upsampling implementation. """Nearest neighbor upsampling implementation.
Args: Args:
...@@ -542,3 +549,54 @@ def nearest_upsampling(data, scale, use_keras_layer=False): ...@@ -542,3 +549,54 @@ def nearest_upsampling(data, scale, use_keras_layer=False):
data = tf.tile( data = tf.tile(
tf.reshape(data, [bs, h, 1, w, 1, c]), [1, 1, scale, 1, scale, 1]) tf.reshape(data, [bs, h, 1, w, 1, c]), [1, 1, scale, 1, scale, 1])
return tf.reshape(data, [bs, h * scale, w * scale, c]) return tf.reshape(data, [bs, h * scale, w * scale, c])
def _gather_rows_from_matrix(input_matrix: tf.Tensor,
row_indices: tf.Tensor) -> tf.Tensor:
"""Gather rows from the input matrix (2-D tensor).
This operation is equivalent to tf.gather(input_matrix, row_indices), but is
implemented in sparse matrix multiplication.
Args:
input_matrix: A 2-D tensor in shape (input_h, input_w) from which to gather
values. The shape must be 2-D, since sparse matrix multiplication is
currently only supported on 2-D matrices.
row_indices: A 1-D int tensor in shape (output_h) which stored the row
indices of the input.
Returns:
A tensor in shape (output_h, input_w) which stores the gathered rows.
"""
input_matrix_shape = input_matrix.get_shape().as_list()
if len(input_matrix_shape) != 2:
raise ValueError(
'Expected the input_matrix tensor (input_h, input_w) has rank == 2, '
'was: %s' % input_matrix_shape)
row_indices_shape = row_indices.get_shape().as_list()
if len(row_indices_shape) != 1:
raise ValueError(
'Expected the row_indices tensor (output_h) has rank == 1, was: %s' %
row_indices_shape)
# (output_h, input_h)
indices_one_hot = tf.one_hot(
row_indices, depth=input_matrix_shape[0], dtype=input_matrix.dtype)
# Matrix multiplication: (output_h, input_h) x (input_h, input_w)
# (output_h, input_w)
return tf.linalg.matmul(indices_one_hot, input_matrix, a_is_sparse=True)
def bilinear_resize_to_bbox(images: tf.Tensor, bbox: tf.Tensor,
output_size: tf.Tensor) -> tf.Tensor:
# TODO(b/241944792): Implement in follow-up CLs
raise NotImplementedError
def bilinear_resize_with_crop_and_pad(images: tf.Tensor, *,
rescale_size: tf.Tensor,
crop_offset: tf.Tensor,
crop_size: tf.Tensor,
output_size: tf.Tensor) -> tf.Tensor:
# TODO(b/241944792): Implement in follow-up CLs
raise NotImplementedError
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment