Commit 1491094f authored by Zhichao Lu's avatar Zhichao Lu Committed by lzc5123016
Browse files

Adds a matrix-multiplication based implementation of the tf.image.crop_and_resize op.

PiperOrigin-RevId: 187693682
parent a4ffb34d
......@@ -820,3 +820,127 @@ def matmul_gather_on_zeroth_axis(params, indices, scope=None):
gathered_result_flattened = tf.matmul(indicator_matrix, params2d)
return tf.reshape(gathered_result_flattened,
tf.stack(indices_shape + params_shape[1:]))
def matmul_crop_and_resize(image, boxes, crop_size, scope=None):
"""Matrix multiplication based implementation of the crop and resize op.
Extracts crops from the input image tensor and bilinearly resizes them
(possibly with aspect ratio change) to a common output size specified by
crop_size. This is more general than the crop_to_bounding_box op which
extracts a fixed size slice from the input image and does not allow
resizing or aspect ratio change.
Returns a tensor with crops from the input image at positions defined at
the bounding box locations in boxes. The cropped boxes are all resized
(with bilinear interpolation) to a fixed size = `[crop_height, crop_width]`.
The result is a 4-D tensor `[num_boxes, crop_height, crop_width, depth]`.
Running time complexity:
O((# channels) * (# boxes) * (crop_size)^2 * M), where M is the number
of pixels of the longer edge of the image.
Note that this operation is meant to replicate the behavior of the standard
tf.image.crop_and_resize operation but there are a few differences.
Specifically:
1) The extrapolation value (the values that are interpolated from outside
the bounds of the image window) is always zero
2) Only XLA supported operations are used (e.g., matrix multiplication).
3) There is no `box_indices` argument --- to run this op on multiple images,
one must currently call this op independently on each image.
4) All shapes and the `crop_size` parameter are assumed to be statically
defined. Moreover, the number of boxes must be strictly nonzero.
Args:
image: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
`int16`, `int32`, `int64`, `half`, `float32`, `float64`.
A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
Both `image_height` and `image_width` need to be positive.
boxes: A `Tensor` of type `float32`.
A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
specifies the coordinates of a box in the `box_ind[i]` image and is
specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized
coordinate value of `y` is mapped to the image coordinate at
`y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image
height is mapped to `[0, image_height - 1] in image height coordinates.
We do allow y1 > y2, in which case the sampled crop is an up-down flipped
version of the original image. The width dimension is treated similarly.
Normalized coordinates outside the `[0, 1]` range are allowed, in which
case we use `extrapolation_value` to extrapolate the input image values.
crop_size: A list of two integers `[crop_height, crop_width]`. All
cropped image patches are resized to this size. The aspect ratio of the
image content is not preserved. Both `crop_height` and `crop_width` need
to be positive.
scope: A name for the operation (optional).
Returns:
A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`
Raises:
ValueError: if image tensor does not have shape
`[1, image_height, image_width, depth]` and all dimensions statically
defined.
ValueError: if boxes tensor does not have shape `[num_boxes, 4]` where
num_boxes > 0.
ValueError: if crop_size is not a list of two positive integers
"""
img_shape = image.shape.as_list()
boxes_shape = boxes.shape.as_list()
_, img_height, img_width, _ = img_shape
if not isinstance(crop_size, list) or len(crop_size) != 2:
raise ValueError('`crop_size` must be a list of length 2')
dimensions = img_shape + crop_size + boxes_shape
if not all([isinstance(dim, int) for dim in dimensions]):
raise ValueError('all input shapes must be statically defined')
if len(crop_size) != 2:
raise ValueError('`crop_size` must be a list of length 2')
if len(boxes_shape) != 2 or boxes_shape[1] != 4:
raise ValueError('`boxes` should have shape `[num_boxes, 4]`')
if len(img_shape) != 4 and img_shape[0] != 1:
raise ValueError('image should have shape '
'`[1, image_height, image_width, depth]`')
num_crops = boxes_shape[0]
if not num_crops > 0:
raise ValueError('number of boxes must be > 0')
if not (crop_size[0] > 0 and crop_size[1] > 0):
raise ValueError('`crop_size` must be a list of two positive integers.')
def _lin_space_weights(num, img_size):
if num > 1:
alpha = (img_size - 1) / float(num - 1)
indices = np.reshape(np.arange(num), (1, num))
start_weights = alpha * (num - 1 - indices)
stop_weights = alpha * indices
else:
start_weights = num * [.5 * (img_size - 1)]
stop_weights = num * [.5 * (img_size - 1)]
return (tf.constant(start_weights, dtype=tf.float32),
tf.constant(stop_weights, dtype=tf.float32))
with tf.name_scope(scope, 'MatMulCropAndResize'):
y1_weights, y2_weights = _lin_space_weights(crop_size[0], img_height)
x1_weights, x2_weights = _lin_space_weights(crop_size[1], img_width)
[y1, x1, y2, x2] = tf.split(value=boxes, num_or_size_splits=4, axis=1)
# Pixel centers of input image and grid points along height and width
image_idx_h = tf.constant(
np.reshape(np.arange(img_height), (1, 1, img_height)), dtype=tf.float32)
image_idx_w = tf.constant(
np.reshape(np.arange(img_width), (1, 1, img_width)), dtype=tf.float32)
grid_pos_h = tf.expand_dims(y1 * y1_weights + y2 * y2_weights, 2)
grid_pos_w = tf.expand_dims(x1 * x1_weights + x2 * x2_weights, 2)
# Create kernel matrices of pairwise kernel evaluations between pixel
# centers of image and grid points.
kernel_h = tf.nn.relu(1 - tf.abs(image_idx_h - grid_pos_h))
kernel_w = tf.nn.relu(1 - tf.abs(image_idx_w - grid_pos_w))
# TODO(jonathanhuang): investigate whether all channels can be processed
# without the explicit unstack --- possibly with a permute and map_fn call.
result_channels = []
for channel in tf.unstack(image, axis=3):
result_channels.append(
tf.matmul(
tf.matmul(kernel_h, tf.tile(channel, [num_crops, 1, 1])),
kernel_w, transpose_b=True))
return tf.stack(result_channels, axis=3)
......@@ -1171,12 +1171,15 @@ class NearestNeighborUpsamplingTest(test_case.TestCase):
def graph_fn(inputs):
custom_op_output = ops.nearest_neighbor_upsampling(inputs, scale=2)
tf_op_output = tf.image.resize_images(
inputs, [4, 4], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
return (custom_op_output, tf_op_output)
inputs = np.reshape(np.arange(2**4), [2, 2, 2, 2])
(custom_op_output, tf_op_output) = self.execute(graph_fn, [inputs])
self.assertAllClose(custom_op_output, tf_op_output)
return custom_op_output
inputs = np.reshape(np.arange(4).astype(np.float32), [1, 2, 2, 1])
custom_op_output = self.execute(graph_fn, [inputs])
expected_output = [[[[0], [0], [1], [1]],
[[0], [0], [1], [1]],
[[2], [2], [3], [3]],
[[2], [2], [3], [3]]]]
self.assertAllClose(custom_op_output, expected_output)
class MatmulGatherOnZerothAxis(test_case.TestCase):
......@@ -1190,7 +1193,7 @@ class MatmulGatherOnZerothAxis(test_case.TestCase):
[5, 6, 7, 8],
[9, 10, 11, 12],
[0, 1, 0, 0]], dtype=np.float32)
indices = np.array([2, 2, 1])
indices = np.array([2, 2, 1], dtype=np.int32)
expected_output = np.array([[9, 10, 11, 12], [9, 10, 11, 12], [5, 6, 7, 8]])
gather_output = self.execute(graph_fn, [params, indices])
self.assertAllClose(gather_output, expected_output)
......@@ -1204,7 +1207,7 @@ class MatmulGatherOnZerothAxis(test_case.TestCase):
[[5, 6], [7, 8]],
[[9, 10], [11, 12]],
[[0, 1], [0, 0]]], dtype=np.float32)
indices = np.array([0, 3, 1])
indices = np.array([0, 3, 1], dtype=np.int32)
expected_output = np.array([[[1, 2], [3, 4]],
[[0, 1], [0, 0]],
[[5, 6], [7, 8]]])
......@@ -1220,7 +1223,7 @@ class MatmulGatherOnZerothAxis(test_case.TestCase):
[5, 6, 7, 8],
[9, 10, 11, 12],
[0, 1, 0, 0]], dtype=np.float32)
indices = np.array([0, 0, 0, 0, 0, 0])
indices = np.array([0, 0, 0, 0, 0, 0], dtype=np.int32)
expected_output = np.array(6*[[1, 2, 3, 4]])
gather_output = self.execute(graph_fn, [params, indices])
self.assertAllClose(gather_output, expected_output)
......@@ -1241,5 +1244,109 @@ class MatmulGatherOnZerothAxis(test_case.TestCase):
params_placeholder: params, indices_placeholder: indices})
self.assertAllClose(gather_output, expected_output)
class OpsTestMatMulCropAndResize(test_case.TestCase):
def testMatMulCropAndResize2x2To1x1(self):
def graph_fn(image, boxes):
return ops.matmul_crop_and_resize(image, boxes, crop_size=[1, 1])
image = np.array([[[[1], [2]], [[3], [4]]]], dtype=np.float32)
boxes = np.array([[0, 0, 1, 1]], dtype=np.float32)
expected_output = [[[[2.5]]]]
crop_output = self.execute(graph_fn, [image, boxes])
self.assertAllClose(crop_output, expected_output)
def testMatMulCropAndResize2x2To1x1Flipped(self):
def graph_fn(image, boxes):
return ops.matmul_crop_and_resize(image, boxes, crop_size=[1, 1])
image = np.array([[[[1], [2]], [[3], [4]]]], dtype=np.float32)
boxes = np.array([[1, 1, 0, 0]], dtype=np.float32)
expected_output = [[[[2.5]]]]
crop_output = self.execute(graph_fn, [image, boxes])
self.assertAllClose(crop_output, expected_output)
def testMatMulCropAndResize2x2To3x3(self):
def graph_fn(image, boxes):
return ops.matmul_crop_and_resize(image, boxes, crop_size=[3, 3])
image = np.array([[[[1], [2]], [[3], [4]]]], dtype=np.float32)
boxes = np.array([[0, 0, 1, 1]], dtype=np.float32)
expected_output = [[[[1.0], [1.5], [2.0]],
[[2.0], [2.5], [3.0]],
[[3.0], [3.5], [4.0]]]]
crop_output = self.execute(graph_fn, [image, boxes])
self.assertAllClose(crop_output, expected_output)
def testMatMulCropAndResize2x2To3x3Flipped(self):
def graph_fn(image, boxes):
return ops.matmul_crop_and_resize(image, boxes, crop_size=[3, 3])
image = np.array([[[[1], [2]], [[3], [4]]]], dtype=np.float32)
boxes = np.array([[1, 1, 0, 0]], dtype=np.float32)
expected_output = [[[[4.0], [3.5], [3.0]],
[[3.0], [2.5], [2.0]],
[[2.0], [1.5], [1.0]]]]
crop_output = self.execute(graph_fn, [image, boxes])
self.assertAllClose(crop_output, expected_output)
def testMatMulCropAndResize3x3To2x2(self):
def graph_fn(image, boxes):
return ops.matmul_crop_and_resize(image, boxes, crop_size=[2, 2])
image = np.array([[[[1], [2], [3]],
[[4], [5], [6]],
[[7], [8], [9]]]], dtype=np.float32)
boxes = np.array([[0, 0, 1, 1],
[0, 0, .5, .5]], dtype=np.float32)
expected_output = [[[[1], [3]], [[7], [9]]],
[[[1], [2]], [[4], [5]]]]
crop_output = self.execute(graph_fn, [image, boxes])
self.assertAllClose(crop_output, expected_output)
def testMatMulCropAndResize3x3To2x2MultiChannel(self):
def graph_fn(image, boxes):
return ops.matmul_crop_and_resize(image, boxes, crop_size=[2, 2])
image = np.array([[[[1, 0], [2, 1], [3, 2]],
[[4, 3], [5, 4], [6, 5]],
[[7, 6], [8, 7], [9, 8]]]], dtype=np.float32)
boxes = np.array([[0, 0, 1, 1],
[0, 0, .5, .5]], dtype=np.float32)
expected_output = [[[[1, 0], [3, 2]], [[7, 6], [9, 8]]],
[[[1, 0], [2, 1]], [[4, 3], [5, 4]]]]
crop_output = self.execute(graph_fn, [image, boxes])
self.assertAllClose(crop_output, expected_output)
def testMatMulCropAndResize3x3To2x2Flipped(self):
def graph_fn(image, boxes):
return ops.matmul_crop_and_resize(image, boxes, crop_size=[2, 2])
image = np.array([[[[1], [2], [3]],
[[4], [5], [6]],
[[7], [8], [9]]]], dtype=np.float32)
boxes = np.array([[1, 1, 0, 0],
[.5, .5, 0, 0]], dtype=np.float32)
expected_output = [[[[9], [7]], [[3], [1]]],
[[[5], [4]], [[2], [1]]]]
crop_output = self.execute(graph_fn, [image, boxes])
self.assertAllClose(crop_output, expected_output)
def testInvalidInputShape(self):
image = tf.constant([[[1], [2]], [[3], [4]]], dtype=tf.float32)
boxes = tf.constant([[-1, -1, 1, 1]], dtype=tf.float32)
crop_size = [4, 4]
with self.assertRaises(ValueError):
_ = ops.matmul_crop_and_resize(image, boxes, crop_size)
if __name__ == '__main__':
tf.test.main()
......@@ -47,7 +47,9 @@ class TestCase(tf.test.TestCase):
materialized_results = sess.run(tpu_computation,
feed_dict=dict(zip(placeholders, inputs)))
sess.run(tpu.shutdown_system())
if len(materialized_results) == 1:
if (len(materialized_results) == 1
and (isinstance(materialized_results, list)
or isinstance(materialized_results, tuple))):
materialized_results = materialized_results[0]
return materialized_results
......@@ -70,7 +72,9 @@ class TestCase(tf.test.TestCase):
tf.local_variables_initializer()])
materialized_results = sess.run(results, feed_dict=dict(zip(placeholders,
inputs)))
if len(materialized_results) == 1:
if (len(materialized_results) == 1
and (isinstance(materialized_results, list)
or isinstance(materialized_results, tuple))):
materialized_results = materialized_results[0]
return materialized_results
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment