Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
d3d2ad3d
Commit
d3d2ad3d
authored
Jul 20, 2020
by
TF Object Detection Team
Browse files
Merge pull request #8746 from syiming:add_multilevel_crop_and_resize
PiperOrigin-RevId: 322214979
parents
52515dc3
f7d74d68
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
178 additions
and
12 deletions
+178
-12
research/object_detection/builders/model_builder.py
research/object_detection/builders/model_builder.py
+4
-2
research/object_detection/meta_architectures/context_rcnn_meta_arch.py
...ct_detection/meta_architectures/context_rcnn_meta_arch.py
+1
-1
research/object_detection/meta_architectures/context_rcnn_meta_arch_tf1_test.py
...ion/meta_architectures/context_rcnn_meta_arch_tf1_test.py
+4
-3
research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
...ect_detection/meta_architectures/faster_rcnn_meta_arch.py
+10
-3
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
...tion/meta_architectures/faster_rcnn_meta_arch_test_lib.py
+4
-3
research/object_detection/utils/spatial_transform_ops.py
research/object_detection/utils/spatial_transform_ops.py
+94
-0
research/object_detection/utils/spatial_transform_ops_test.py
...arch/object_detection/utils/spatial_transform_ops_test.py
+61
-0
No files found.
research/object_detection/builders/model_builder.py
View file @
d3d2ad3d
...
...
@@ -39,6 +39,7 @@ from object_detection.protos import losses_pb2
from
object_detection.protos
import
model_pb2
from
object_detection.utils
import
label_map_util
from
object_detection.utils
import
ops
from
object_detection.utils
import
spatial_transform_ops
as
spatial_ops
from
object_detection.utils
import
tf_version
## Feature Extractors for TF
...
...
@@ -656,8 +657,9 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
second_stage_localization_loss_weight
)
crop_and_resize_fn
=
(
ops
.
matmul_crop_and_resize
if
frcnn_config
.
use_matmul_crop_and_resize
else
ops
.
native_crop_and_resize
)
spatial_ops
.
multilevel_matmul_crop_and_resize
if
frcnn_config
.
use_matmul_crop_and_resize
else
spatial_ops
.
multilevel_native_crop_and_resize
)
clip_anchors_to_image
=
(
frcnn_config
.
clip_anchors_to_image
)
...
...
research/object_detection/meta_architectures/context_rcnn_meta_arch.py
View file @
d3d2ad3d
...
...
@@ -324,7 +324,7 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
A float32 Tensor with shape [K, new_height, new_width, depth].
"""
box_features
=
self
.
_crop_and_resize_fn
(
features_to_crop
,
proposal_boxes_normalized
,
[
features_to_crop
]
,
proposal_boxes_normalized
,
None
,
[
self
.
_initial_crop_size
,
self
.
_initial_crop_size
])
attention_features
=
self
.
_context_feature_extract_fn
(
...
...
research/object_detection/meta_architectures/context_rcnn_meta_arch_tf1_test.py
View file @
d3d2ad3d
...
...
@@ -41,7 +41,7 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch
from
object_detection.protos
import
box_predictor_pb2
from
object_detection.protos
import
hyperparams_pb2
from
object_detection.protos
import
post_processing_pb2
from
object_detection.utils
import
ops
from
object_detection.utils
import
spatial_transform_ops
as
spatial_
ops
from
object_detection.utils
import
test_case
from
object_detection.utils
import
test_utils
from
object_detection.utils
import
tf_version
...
...
@@ -363,8 +363,9 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
max_negatives_per_positive
=
None
)
crop_and_resize_fn
=
(
ops
.
matmul_crop_and_resize
if
use_matmul_crop_and_resize
else
ops
.
native_crop_and_resize
)
spatial_ops
.
multilevel_matmul_crop_and_resize
if
use_matmul_crop_and_resize
else
spatial_ops
.
multilevel_native_crop_and_resize
)
common_kwargs
=
{
'is_training'
:
is_training
,
...
...
research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
View file @
d3d2ad3d
...
...
@@ -1948,9 +1948,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
Returns:
A float32 tensor with shape [K, new_height, new_width, depth].
"""
features_to_crop
=
[
features_to_crop
]
num_levels
=
len
(
features_to_crop
)
box_levels
=
None
if
num_levels
!=
1
:
# If there are multiple levels to select, get the box levels
box_levels
=
ops
.
fpn_feature_levels
(
num_levels
,
num_levels
-
1
,
1.0
/
224
,
proposal_boxes_normalized
)
cropped_regions
=
self
.
_flatten_first_two_dimensions
(
self
.
_crop_and_resize_fn
(
features_to_crop
,
proposal_boxes_normalized
,
features_to_crop
,
proposal_boxes_normalized
,
box_levels
,
[
self
.
_initial_crop_size
,
self
.
_initial_crop_size
]))
return
self
.
_maxpool_layer
(
cropped_regions
)
...
...
@@ -2517,8 +2524,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
image_shape
[
1
],
image_shape
[
2
],
check_range
=
False
).
get
()
flat_cropped_gt_mask
=
self
.
_crop_and_resize_fn
(
tf
.
expand_dims
(
flat_gt_masks
,
-
1
),
tf
.
expand_dims
(
flat_normalized_proposals
,
axis
=
1
),
[
tf
.
expand_dims
(
flat_gt_masks
,
-
1
)
]
,
tf
.
expand_dims
(
flat_normalized_proposals
,
axis
=
1
),
None
,
[
mask_height
,
mask_width
])
# Without stopping gradients into cropped groundtruth masks the
# performance with 100-padded groundtruth masks when batch size > 1 is
...
...
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
View file @
d3d2ad3d
...
...
@@ -34,7 +34,7 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch
from
object_detection.protos
import
box_predictor_pb2
from
object_detection.protos
import
hyperparams_pb2
from
object_detection.protos
import
post_processing_pb2
from
object_detection.utils
import
ops
from
object_detection.utils
import
spatial_transform_ops
as
spatial_
ops
from
object_detection.utils
import
test_case
from
object_detection.utils
import
test_utils
from
object_detection.utils
import
tf_version
...
...
@@ -377,8 +377,9 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
max_negatives_per_positive
=
None
)
crop_and_resize_fn
=
(
ops
.
matmul_crop_and_resize
if
use_matmul_crop_and_resize
else
ops
.
native_crop_and_resize
)
spatial_ops
.
multilevel_matmul_crop_and_resize
if
use_matmul_crop_and_resize
else
spatial_ops
.
multilevel_native_crop_and_resize
)
common_kwargs
=
{
'is_training'
:
is_training
,
...
...
research/object_detection/utils/spatial_transform_ops.py
View file @
d3d2ad3d
...
...
@@ -411,6 +411,56 @@ def multilevel_roi_align(features, boxes, box_levels, output_size,
return
features_per_box
def
multilevel_native_crop_and_resize
(
images
,
boxes
,
box_levels
,
crop_size
,
scope
=
None
):
"""Multilevel native crop and resize.
Same as `multilevel_matmul_crop_and_resize` but uses tf.image.crop_and_resize.
Args:
images: A list of 4-D tensor of shape
[batch, image_height, image_width, depth] representing features of
different size.
boxes: A `Tensor` of type `float32`.
A 3-D tensor of shape `[batch, num_boxes, 4]`. The boxes are specified in
normalized coordinates and are of the form `[y1, x1, y2, x2]`. A
normalized coordinate value of `y` is mapped to the image coordinate at
`y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image
height is mapped to `[0, image_height - 1] in image height coordinates.
We do allow y1 > y2, in which case the sampled crop is an up-down flipped
version of the original image. The width dimension is treated similarly.
Normalized coordinates outside the `[0, 1]` range are allowed, in which
case we use `extrapolation_value` to extrapolate the input image values.
box_levels: A 2-D tensor of shape [batch, num_boxes] representing the level
of the box.
crop_size: A list of two integers `[crop_height, crop_width]`. All
cropped image patches are resized to this size. The aspect ratio of the
image content is not preserved. Both `crop_height` and `crop_width` need
to be positive.
scope: A name for the operation (optional).
Returns:
A 5-D float tensor of shape `[batch, num_boxes, crop_height, crop_width,
depth]`
"""
if
box_levels
is
None
:
return
native_crop_and_resize
(
images
[
0
],
boxes
,
crop_size
,
scope
)
with
tf
.
name_scope
(
'MultiLevelNativeCropAndResize'
):
cropped_feature_list
=
[]
for
level
,
image
in
enumerate
(
images
):
# For each level, crop the feature according to all boxes
# set the cropped feature not at this level to 0 tensor.
# Consider more efficient way of computing cropped features.
cropped
=
native_crop_and_resize
(
image
,
boxes
,
crop_size
,
scope
)
cond
=
tf
.
tile
(
tf
.
equal
(
box_levels
,
level
)[:,
:,
tf
.
newaxis
],
[
1
,
1
]
+
[
tf
.
math
.
reduce_prod
(
cropped
.
shape
.
as_list
()[
2
:])])
cond
=
tf
.
reshape
(
cond
,
cropped
.
shape
)
cropped_final
=
tf
.
where
(
cond
,
cropped
,
tf
.
zeros_like
(
cropped
))
cropped_feature_list
.
append
(
cropped_final
)
return
tf
.
math
.
reduce_sum
(
cropped_feature_list
,
axis
=
0
)
def
native_crop_and_resize
(
image
,
boxes
,
crop_size
,
scope
=
None
):
"""Same as `matmul_crop_and_resize` but uses tf.image.crop_and_resize."""
def
get_box_inds
(
proposals
):
...
...
@@ -431,6 +481,50 @@ def native_crop_and_resize(image, boxes, crop_size, scope=None):
return
tf
.
reshape
(
cropped_regions
,
final_shape
)
def
multilevel_matmul_crop_and_resize
(
images
,
boxes
,
box_levels
,
crop_size
,
extrapolation_value
=
0.0
,
scope
=
None
):
"""Multilevel matmul crop and resize.
Same as `matmul_crop_and_resize` but crop images according to box levels.
Args:
images: A list of 4-D tensor of shape
[batch, image_height, image_width, depth] representing features of
different size.
boxes: A `Tensor` of type `float32` or 'bfloat16'.
A 3-D tensor of shape `[batch, num_boxes, 4]`. The boxes are specified in
normalized coordinates and are of the form `[y1, x1, y2, x2]`. A
normalized coordinate value of `y` is mapped to the image coordinate at
`y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image
height is mapped to `[0, image_height - 1] in image height coordinates.
We do allow y1 > y2, in which case the sampled crop is an up-down flipped
version of the original image. The width dimension is treated similarly.
Normalized coordinates outside the `[0, 1]` range are allowed, in which
case we use `extrapolation_value` to extrapolate the input image values.
box_levels: A 2-D tensor of shape [batch, num_boxes] representing the level
of the box.
crop_size: A list of two integers `[crop_height, crop_width]`. All
cropped image patches are resized to this size. The aspect ratio of the
image content is not preserved. Both `crop_height` and `crop_width` need
to be positive.
extrapolation_value: A float value to use for extrapolation.
scope: A name for the operation (optional).
Returns:
A 5-D float tensor of shape `[batch, num_boxes, crop_height, crop_width,
depth]`
"""
with
tf
.
name_scope
(
scope
,
'MultiLevelMatMulCropAndResize'
):
if
box_levels
is
None
:
box_levels
=
tf
.
zeros
(
tf
.
shape
(
boxes
)[:
2
],
dtype
=
tf
.
int32
)
return
multilevel_roi_align
(
images
,
boxes
,
box_levels
,
crop_size
,
align_corners
=
True
,
extrapolation_value
=
extrapolation_value
)
def
matmul_crop_and_resize
(
image
,
boxes
,
crop_size
,
extrapolation_value
=
0.0
,
scope
=
None
):
"""Matrix multiplication based implementation of the crop and resize op.
...
...
research/object_detection/utils/spatial_transform_ops_test.py
View file @
d3d2ad3d
...
...
@@ -512,6 +512,38 @@ class MatMulCropAndResizeTest(test_case.TestCase):
crop_output
=
self
.
execute
(
graph_fn
,
[
image
,
boxes
])
self
.
assertAllClose
(
crop_output
,
expected_output
)
def
testMultilevelMatMulCropAndResize
(
self
):
def
graph_fn
(
image1
,
image2
,
boxes
,
box_levels
):
return
spatial_ops
.
multilevel_matmul_crop_and_resize
([
image1
,
image2
],
boxes
,
box_levels
,
crop_size
=
[
2
,
2
])
image
=
[
np
.
array
([[[[
1
,
0
],
[
2
,
0
],
[
3
,
0
]],
[[
4
,
0
],
[
5
,
0
],
[
6
,
0
]],
[[
7
,
0
],
[
8
,
0
],
[
9
,
0
]]],
[[[
1
,
0
],
[
2
,
0
],
[
3
,
0
]],
[[
4
,
0
],
[
5
,
0
],
[
6
,
0
]],
[[
7
,
0
],
[
8
,
0
],
[
9
,
0
]]]],
dtype
=
np
.
float32
),
np
.
array
([[[[
1
,
0
],
[
2
,
1
],
[
3
,
2
]],
[[
4
,
3
],
[
5
,
4
],
[
6
,
5
]],
[[
7
,
6
],
[
8
,
7
],
[
9
,
8
]]],
[[[
1
,
0
],
[
2
,
1
],
[
3
,
2
]],
[[
4
,
3
],
[
5
,
4
],
[
6
,
5
]],
[[
7
,
6
],
[
8
,
7
],
[
9
,
8
]]]],
dtype
=
np
.
float32
)]
boxes
=
np
.
array
([[[
1
,
1
,
0
,
0
],
[.
5
,
.
5
,
0
,
0
]],
[[
0
,
0
,
1
,
1
],
[
0
,
0
,
.
5
,
.
5
]]],
dtype
=
np
.
float32
)
box_levels
=
np
.
array
([[
0
,
1
],
[
1
,
1
]],
dtype
=
np
.
int32
)
expected_output
=
[[[[[
9
,
0
],
[
7
,
0
]],
[[
3
,
0
],
[
1
,
0
]]],
[[[
5
,
4
],
[
4
,
3
]],
[[
2
,
1
],
[
1
,
0
]]]],
[[[[
1
,
0
],
[
3
,
2
]],
[[
7
,
6
],
[
9
,
8
]]],
[[[
1
,
0
],
[
2
,
1
]],
[[
4
,
3
],
[
5
,
4
]]]]]
crop_output
=
self
.
execute
(
graph_fn
,
image
+
[
boxes
,
box_levels
])
self
.
assertAllClose
(
crop_output
,
expected_output
)
class
NativeCropAndResizeTest
(
test_case
.
TestCase
):
...
...
@@ -537,6 +569,35 @@ class NativeCropAndResizeTest(test_case.TestCase):
crop_output
=
self
.
execute_cpu
(
graph_fn
,
[
image
,
boxes
])
self
.
assertAllClose
(
crop_output
,
expected_output
)
def
testMultilevelBatchCropAndResize3x3To2x2_2Channels
(
self
):
def
graph_fn
(
image1
,
image2
,
boxes
,
box_levels
):
return
spatial_ops
.
multilevel_native_crop_and_resize
([
image1
,
image2
],
boxes
,
box_levels
,
crop_size
=
[
2
,
2
])
image
=
[
np
.
array
([[[[
1
,
0
],
[
2
,
1
],
[
3
,
2
]],
[[
4
,
3
],
[
5
,
4
],
[
6
,
5
]],
[[
7
,
6
],
[
8
,
7
],
[
9
,
8
]]],
[[[
1
,
0
],
[
2
,
1
],
[
3
,
2
]],
[[
4
,
3
],
[
5
,
4
],
[
6
,
5
]],
[[
7
,
6
],
[
8
,
7
],
[
9
,
8
]]]],
dtype
=
np
.
float32
),
np
.
array
([[[[
1
,
0
],
[
2
,
1
]],
[[
4
,
3
],
[
5
,
4
]]],
[[[
1
,
0
],
[
2
,
1
]],
[[
4
,
3
],
[
5
,
4
]]]],
dtype
=
np
.
float32
)]
boxes
=
np
.
array
([[[
0
,
0
,
1
,
1
],
[
0
,
0
,
.
5
,
.
5
]],
[[
1
,
1
,
0
,
0
],
[.
5
,
.
5
,
0
,
0
]]],
dtype
=
np
.
float32
)
box_levels
=
np
.
array
([[
0
,
1
],
[
0
,
0
]],
dtype
=
np
.
float32
)
expected_output
=
[[[[[
1
,
0
],
[
3
,
2
]],
[[
7
,
6
],
[
9
,
8
]]],
[[[
1
,
0
],
[
1.5
,
0.5
]],
[[
2.5
,
1.5
],
[
3
,
2
]]]],
[[[[
9
,
8
],
[
7
,
6
]],
[[
3
,
2
],
[
1
,
0
]]],
[[[
5
,
4
],
[
4
,
3
]],
[[
2
,
1
],
[
1
,
0
]]]]]
crop_output
=
self
.
execute_cpu
(
graph_fn
,
image
+
[
boxes
,
box_levels
])
self
.
assertAllClose
(
crop_output
,
expected_output
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment