Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
1491094f
Commit
1491094f
authored
Mar 02, 2018
by
Zhichao Lu
Committed by
lzc5123016
Mar 03, 2018
Browse files
Adds a matrix-multiplication based implementation of the tf.image.crop_and_resize op.
PiperOrigin-RevId: 187693682
parent
a4ffb34d
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
246 additions
and
11 deletions
+246
-11
research/object_detection/utils/ops.py
research/object_detection/utils/ops.py
+124
-0
research/object_detection/utils/ops_test.py
research/object_detection/utils/ops_test.py
+116
-9
research/object_detection/utils/test_case.py
research/object_detection/utils/test_case.py
+6
-2
No files found.
research/object_detection/utils/ops.py
View file @
1491094f
...
...
@@ -820,3 +820,127 @@ def matmul_gather_on_zeroth_axis(params, indices, scope=None):
gathered_result_flattened
=
tf
.
matmul
(
indicator_matrix
,
params2d
)
return
tf
.
reshape
(
gathered_result_flattened
,
tf
.
stack
(
indices_shape
+
params_shape
[
1
:]))
def
matmul_crop_and_resize
(
image
,
boxes
,
crop_size
,
scope
=
None
):
"""Matrix multiplication based implementation of the crop and resize op.
Extracts crops from the input image tensor and bilinearly resizes them
(possibly with aspect ratio change) to a common output size specified by
crop_size. This is more general than the crop_to_bounding_box op which
extracts a fixed size slice from the input image and does not allow
resizing or aspect ratio change.
Returns a tensor with crops from the input image at positions defined at
the bounding box locations in boxes. The cropped boxes are all resized
(with bilinear interpolation) to a fixed size = `[crop_height, crop_width]`.
The result is a 4-D tensor `[num_boxes, crop_height, crop_width, depth]`.
Running time complexity:
O((# channels) * (# boxes) * (crop_size)^2 * M), where M is the number
of pixels of the longer edge of the image.
Note that this operation is meant to replicate the behavior of the standard
tf.image.crop_and_resize operation but there are a few differences.
Specifically:
1) The extrapolation value (the values that are interpolated from outside
the bounds of the image window) is always zero
2) Only XLA supported operations are used (e.g., matrix multiplication).
3) There is no `box_indices` argument --- to run this op on multiple images,
one must currently call this op independently on each image.
4) All shapes and the `crop_size` parameter are assumed to be statically
defined. Moreover, the number of boxes must be strictly nonzero.
Args:
image: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
`int16`, `int32`, `int64`, `half`, `float32`, `float64`.
A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
Both `image_height` and `image_width` need to be positive.
boxes: A `Tensor` of type `float32`.
A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
specifies the coordinates of a box in the `box_ind[i]` image and is
specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized
coordinate value of `y` is mapped to the image coordinate at
`y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image
height is mapped to `[0, image_height - 1] in image height coordinates.
We do allow y1 > y2, in which case the sampled crop is an up-down flipped
version of the original image. The width dimension is treated similarly.
Normalized coordinates outside the `[0, 1]` range are allowed, in which
case we use `extrapolation_value` to extrapolate the input image values.
crop_size: A list of two integers `[crop_height, crop_width]`. All
cropped image patches are resized to this size. The aspect ratio of the
image content is not preserved. Both `crop_height` and `crop_width` need
to be positive.
scope: A name for the operation (optional).
Returns:
A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`
Raises:
ValueError: if image tensor does not have shape
`[1, image_height, image_width, depth]` and all dimensions statically
defined.
ValueError: if boxes tensor does not have shape `[num_boxes, 4]` where
num_boxes > 0.
ValueError: if crop_size is not a list of two positive integers
"""
img_shape
=
image
.
shape
.
as_list
()
boxes_shape
=
boxes
.
shape
.
as_list
()
_
,
img_height
,
img_width
,
_
=
img_shape
if
not
isinstance
(
crop_size
,
list
)
or
len
(
crop_size
)
!=
2
:
raise
ValueError
(
'`crop_size` must be a list of length 2'
)
dimensions
=
img_shape
+
crop_size
+
boxes_shape
if
not
all
([
isinstance
(
dim
,
int
)
for
dim
in
dimensions
]):
raise
ValueError
(
'all input shapes must be statically defined'
)
if
len
(
crop_size
)
!=
2
:
raise
ValueError
(
'`crop_size` must be a list of length 2'
)
if
len
(
boxes_shape
)
!=
2
or
boxes_shape
[
1
]
!=
4
:
raise
ValueError
(
'`boxes` should have shape `[num_boxes, 4]`'
)
if
len
(
img_shape
)
!=
4
and
img_shape
[
0
]
!=
1
:
raise
ValueError
(
'image should have shape '
'`[1, image_height, image_width, depth]`'
)
num_crops
=
boxes_shape
[
0
]
if
not
num_crops
>
0
:
raise
ValueError
(
'number of boxes must be > 0'
)
if
not
(
crop_size
[
0
]
>
0
and
crop_size
[
1
]
>
0
):
raise
ValueError
(
'`crop_size` must be a list of two positive integers.'
)
def
_lin_space_weights
(
num
,
img_size
):
if
num
>
1
:
alpha
=
(
img_size
-
1
)
/
float
(
num
-
1
)
indices
=
np
.
reshape
(
np
.
arange
(
num
),
(
1
,
num
))
start_weights
=
alpha
*
(
num
-
1
-
indices
)
stop_weights
=
alpha
*
indices
else
:
start_weights
=
num
*
[.
5
*
(
img_size
-
1
)]
stop_weights
=
num
*
[.
5
*
(
img_size
-
1
)]
return
(
tf
.
constant
(
start_weights
,
dtype
=
tf
.
float32
),
tf
.
constant
(
stop_weights
,
dtype
=
tf
.
float32
))
with
tf
.
name_scope
(
scope
,
'MatMulCropAndResize'
):
y1_weights
,
y2_weights
=
_lin_space_weights
(
crop_size
[
0
],
img_height
)
x1_weights
,
x2_weights
=
_lin_space_weights
(
crop_size
[
1
],
img_width
)
[
y1
,
x1
,
y2
,
x2
]
=
tf
.
split
(
value
=
boxes
,
num_or_size_splits
=
4
,
axis
=
1
)
# Pixel centers of input image and grid points along height and width
image_idx_h
=
tf
.
constant
(
np
.
reshape
(
np
.
arange
(
img_height
),
(
1
,
1
,
img_height
)),
dtype
=
tf
.
float32
)
image_idx_w
=
tf
.
constant
(
np
.
reshape
(
np
.
arange
(
img_width
),
(
1
,
1
,
img_width
)),
dtype
=
tf
.
float32
)
grid_pos_h
=
tf
.
expand_dims
(
y1
*
y1_weights
+
y2
*
y2_weights
,
2
)
grid_pos_w
=
tf
.
expand_dims
(
x1
*
x1_weights
+
x2
*
x2_weights
,
2
)
# Create kernel matrices of pairwise kernel evaluations between pixel
# centers of image and grid points.
kernel_h
=
tf
.
nn
.
relu
(
1
-
tf
.
abs
(
image_idx_h
-
grid_pos_h
))
kernel_w
=
tf
.
nn
.
relu
(
1
-
tf
.
abs
(
image_idx_w
-
grid_pos_w
))
# TODO(jonathanhuang): investigate whether all channels can be processed
# without the explicit unstack --- possibly with a permute and map_fn call.
result_channels
=
[]
for
channel
in
tf
.
unstack
(
image
,
axis
=
3
):
result_channels
.
append
(
tf
.
matmul
(
tf
.
matmul
(
kernel_h
,
tf
.
tile
(
channel
,
[
num_crops
,
1
,
1
])),
kernel_w
,
transpose_b
=
True
))
return
tf
.
stack
(
result_channels
,
axis
=
3
)
research/object_detection/utils/ops_test.py
View file @
1491094f
...
...
@@ -1171,12 +1171,15 @@ class NearestNeighborUpsamplingTest(test_case.TestCase):
def
graph_fn
(
inputs
):
custom_op_output
=
ops
.
nearest_neighbor_upsampling
(
inputs
,
scale
=
2
)
tf_op_output
=
tf
.
image
.
resize_images
(
inputs
,
[
4
,
4
],
method
=
tf
.
image
.
ResizeMethod
.
NEAREST_NEIGHBOR
)
return
(
custom_op_output
,
tf_op_output
)
inputs
=
np
.
reshape
(
np
.
arange
(
2
**
4
),
[
2
,
2
,
2
,
2
])
(
custom_op_output
,
tf_op_output
)
=
self
.
execute
(
graph_fn
,
[
inputs
])
self
.
assertAllClose
(
custom_op_output
,
tf_op_output
)
return
custom_op_output
inputs
=
np
.
reshape
(
np
.
arange
(
4
).
astype
(
np
.
float32
),
[
1
,
2
,
2
,
1
])
custom_op_output
=
self
.
execute
(
graph_fn
,
[
inputs
])
expected_output
=
[[[[
0
],
[
0
],
[
1
],
[
1
]],
[[
0
],
[
0
],
[
1
],
[
1
]],
[[
2
],
[
2
],
[
3
],
[
3
]],
[[
2
],
[
2
],
[
3
],
[
3
]]]]
self
.
assertAllClose
(
custom_op_output
,
expected_output
)
class
MatmulGatherOnZerothAxis
(
test_case
.
TestCase
):
...
...
@@ -1190,7 +1193,7 @@ class MatmulGatherOnZerothAxis(test_case.TestCase):
[
5
,
6
,
7
,
8
],
[
9
,
10
,
11
,
12
],
[
0
,
1
,
0
,
0
]],
dtype
=
np
.
float32
)
indices
=
np
.
array
([
2
,
2
,
1
])
indices
=
np
.
array
([
2
,
2
,
1
]
,
dtype
=
np
.
int32
)
expected_output
=
np
.
array
([[
9
,
10
,
11
,
12
],
[
9
,
10
,
11
,
12
],
[
5
,
6
,
7
,
8
]])
gather_output
=
self
.
execute
(
graph_fn
,
[
params
,
indices
])
self
.
assertAllClose
(
gather_output
,
expected_output
)
...
...
@@ -1204,7 +1207,7 @@ class MatmulGatherOnZerothAxis(test_case.TestCase):
[[
5
,
6
],
[
7
,
8
]],
[[
9
,
10
],
[
11
,
12
]],
[[
0
,
1
],
[
0
,
0
]]],
dtype
=
np
.
float32
)
indices
=
np
.
array
([
0
,
3
,
1
])
indices
=
np
.
array
([
0
,
3
,
1
]
,
dtype
=
np
.
int32
)
expected_output
=
np
.
array
([[[
1
,
2
],
[
3
,
4
]],
[[
0
,
1
],
[
0
,
0
]],
[[
5
,
6
],
[
7
,
8
]]])
...
...
@@ -1220,7 +1223,7 @@ class MatmulGatherOnZerothAxis(test_case.TestCase):
[
5
,
6
,
7
,
8
],
[
9
,
10
,
11
,
12
],
[
0
,
1
,
0
,
0
]],
dtype
=
np
.
float32
)
indices
=
np
.
array
([
0
,
0
,
0
,
0
,
0
,
0
])
indices
=
np
.
array
([
0
,
0
,
0
,
0
,
0
,
0
]
,
dtype
=
np
.
int32
)
expected_output
=
np
.
array
(
6
*
[[
1
,
2
,
3
,
4
]])
gather_output
=
self
.
execute
(
graph_fn
,
[
params
,
indices
])
self
.
assertAllClose
(
gather_output
,
expected_output
)
...
...
@@ -1241,5 +1244,109 @@ class MatmulGatherOnZerothAxis(test_case.TestCase):
params_placeholder
:
params
,
indices_placeholder
:
indices
})
self
.
assertAllClose
(
gather_output
,
expected_output
)
class
OpsTestMatMulCropAndResize
(
test_case
.
TestCase
):
def
testMatMulCropAndResize2x2To1x1
(
self
):
def
graph_fn
(
image
,
boxes
):
return
ops
.
matmul_crop_and_resize
(
image
,
boxes
,
crop_size
=
[
1
,
1
])
image
=
np
.
array
([[[[
1
],
[
2
]],
[[
3
],
[
4
]]]],
dtype
=
np
.
float32
)
boxes
=
np
.
array
([[
0
,
0
,
1
,
1
]],
dtype
=
np
.
float32
)
expected_output
=
[[[[
2.5
]]]]
crop_output
=
self
.
execute
(
graph_fn
,
[
image
,
boxes
])
self
.
assertAllClose
(
crop_output
,
expected_output
)
def
testMatMulCropAndResize2x2To1x1Flipped
(
self
):
def
graph_fn
(
image
,
boxes
):
return
ops
.
matmul_crop_and_resize
(
image
,
boxes
,
crop_size
=
[
1
,
1
])
image
=
np
.
array
([[[[
1
],
[
2
]],
[[
3
],
[
4
]]]],
dtype
=
np
.
float32
)
boxes
=
np
.
array
([[
1
,
1
,
0
,
0
]],
dtype
=
np
.
float32
)
expected_output
=
[[[[
2.5
]]]]
crop_output
=
self
.
execute
(
graph_fn
,
[
image
,
boxes
])
self
.
assertAllClose
(
crop_output
,
expected_output
)
def
testMatMulCropAndResize2x2To3x3
(
self
):
def
graph_fn
(
image
,
boxes
):
return
ops
.
matmul_crop_and_resize
(
image
,
boxes
,
crop_size
=
[
3
,
3
])
image
=
np
.
array
([[[[
1
],
[
2
]],
[[
3
],
[
4
]]]],
dtype
=
np
.
float32
)
boxes
=
np
.
array
([[
0
,
0
,
1
,
1
]],
dtype
=
np
.
float32
)
expected_output
=
[[[[
1.0
],
[
1.5
],
[
2.0
]],
[[
2.0
],
[
2.5
],
[
3.0
]],
[[
3.0
],
[
3.5
],
[
4.0
]]]]
crop_output
=
self
.
execute
(
graph_fn
,
[
image
,
boxes
])
self
.
assertAllClose
(
crop_output
,
expected_output
)
def
testMatMulCropAndResize2x2To3x3Flipped
(
self
):
def
graph_fn
(
image
,
boxes
):
return
ops
.
matmul_crop_and_resize
(
image
,
boxes
,
crop_size
=
[
3
,
3
])
image
=
np
.
array
([[[[
1
],
[
2
]],
[[
3
],
[
4
]]]],
dtype
=
np
.
float32
)
boxes
=
np
.
array
([[
1
,
1
,
0
,
0
]],
dtype
=
np
.
float32
)
expected_output
=
[[[[
4.0
],
[
3.5
],
[
3.0
]],
[[
3.0
],
[
2.5
],
[
2.0
]],
[[
2.0
],
[
1.5
],
[
1.0
]]]]
crop_output
=
self
.
execute
(
graph_fn
,
[
image
,
boxes
])
self
.
assertAllClose
(
crop_output
,
expected_output
)
def
testMatMulCropAndResize3x3To2x2
(
self
):
def
graph_fn
(
image
,
boxes
):
return
ops
.
matmul_crop_and_resize
(
image
,
boxes
,
crop_size
=
[
2
,
2
])
image
=
np
.
array
([[[[
1
],
[
2
],
[
3
]],
[[
4
],
[
5
],
[
6
]],
[[
7
],
[
8
],
[
9
]]]],
dtype
=
np
.
float32
)
boxes
=
np
.
array
([[
0
,
0
,
1
,
1
],
[
0
,
0
,
.
5
,
.
5
]],
dtype
=
np
.
float32
)
expected_output
=
[[[[
1
],
[
3
]],
[[
7
],
[
9
]]],
[[[
1
],
[
2
]],
[[
4
],
[
5
]]]]
crop_output
=
self
.
execute
(
graph_fn
,
[
image
,
boxes
])
self
.
assertAllClose
(
crop_output
,
expected_output
)
def
testMatMulCropAndResize3x3To2x2MultiChannel
(
self
):
def
graph_fn
(
image
,
boxes
):
return
ops
.
matmul_crop_and_resize
(
image
,
boxes
,
crop_size
=
[
2
,
2
])
image
=
np
.
array
([[[[
1
,
0
],
[
2
,
1
],
[
3
,
2
]],
[[
4
,
3
],
[
5
,
4
],
[
6
,
5
]],
[[
7
,
6
],
[
8
,
7
],
[
9
,
8
]]]],
dtype
=
np
.
float32
)
boxes
=
np
.
array
([[
0
,
0
,
1
,
1
],
[
0
,
0
,
.
5
,
.
5
]],
dtype
=
np
.
float32
)
expected_output
=
[[[[
1
,
0
],
[
3
,
2
]],
[[
7
,
6
],
[
9
,
8
]]],
[[[
1
,
0
],
[
2
,
1
]],
[[
4
,
3
],
[
5
,
4
]]]]
crop_output
=
self
.
execute
(
graph_fn
,
[
image
,
boxes
])
self
.
assertAllClose
(
crop_output
,
expected_output
)
def
testMatMulCropAndResize3x3To2x2Flipped
(
self
):
def
graph_fn
(
image
,
boxes
):
return
ops
.
matmul_crop_and_resize
(
image
,
boxes
,
crop_size
=
[
2
,
2
])
image
=
np
.
array
([[[[
1
],
[
2
],
[
3
]],
[[
4
],
[
5
],
[
6
]],
[[
7
],
[
8
],
[
9
]]]],
dtype
=
np
.
float32
)
boxes
=
np
.
array
([[
1
,
1
,
0
,
0
],
[.
5
,
.
5
,
0
,
0
]],
dtype
=
np
.
float32
)
expected_output
=
[[[[
9
],
[
7
]],
[[
3
],
[
1
]]],
[[[
5
],
[
4
]],
[[
2
],
[
1
]]]]
crop_output
=
self
.
execute
(
graph_fn
,
[
image
,
boxes
])
self
.
assertAllClose
(
crop_output
,
expected_output
)
def
testInvalidInputShape
(
self
):
image
=
tf
.
constant
([[[
1
],
[
2
]],
[[
3
],
[
4
]]],
dtype
=
tf
.
float32
)
boxes
=
tf
.
constant
([[
-
1
,
-
1
,
1
,
1
]],
dtype
=
tf
.
float32
)
crop_size
=
[
4
,
4
]
with
self
.
assertRaises
(
ValueError
):
_
=
ops
.
matmul_crop_and_resize
(
image
,
boxes
,
crop_size
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/utils/test_case.py
View file @
1491094f
...
...
@@ -47,7 +47,9 @@ class TestCase(tf.test.TestCase):
materialized_results
=
sess
.
run
(
tpu_computation
,
feed_dict
=
dict
(
zip
(
placeholders
,
inputs
)))
sess
.
run
(
tpu
.
shutdown_system
())
if
len
(
materialized_results
)
==
1
:
if
(
len
(
materialized_results
)
==
1
and
(
isinstance
(
materialized_results
,
list
)
or
isinstance
(
materialized_results
,
tuple
))):
materialized_results
=
materialized_results
[
0
]
return
materialized_results
...
...
@@ -70,7 +72,9 @@ class TestCase(tf.test.TestCase):
tf
.
local_variables_initializer
()])
materialized_results
=
sess
.
run
(
results
,
feed_dict
=
dict
(
zip
(
placeholders
,
inputs
)))
if
len
(
materialized_results
)
==
1
:
if
(
len
(
materialized_results
)
==
1
and
(
isinstance
(
materialized_results
,
list
)
or
isinstance
(
materialized_results
,
tuple
))):
materialized_results
=
materialized_results
[
0
]
return
materialized_results
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment