Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
a04d9e0e
Commit
a04d9e0e
authored
Jun 14, 2021
by
Vishnu Banna
Browse files
merged
parents
64f16d61
bcbce005
Changes
120
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
586 additions
and
155 deletions
+586
-155
research/object_detection/core/preprocessor.py
research/object_detection/core/preprocessor.py
+29
-2
research/object_detection/core/preprocessor_test.py
research/object_detection/core/preprocessor_test.py
+40
-3
research/object_detection/core/standard_fields.py
research/object_detection/core/standard_fields.py
+4
-0
research/object_detection/core/target_assigner.py
research/object_detection/core/target_assigner.py
+58
-14
research/object_detection/core/target_assigner_test.py
research/object_detection/core/target_assigner_test.py
+42
-14
research/object_detection/data_decoders/tf_example_decoder.py
...arch/object_detection/data_decoders/tf_example_decoder.py
+24
-0
research/object_detection/data_decoders/tf_example_decoder_test.py
...object_detection/data_decoders/tf_example_decoder_test.py
+68
-0
research/object_detection/g3doc/running_on_mobile_tf2.md
research/object_detection/g3doc/running_on_mobile_tf2.md
+53
-25
research/object_detection/inputs.py
research/object_detection/inputs.py
+11
-0
research/object_detection/inputs_test.py
research/object_detection/inputs_test.py
+8
-3
research/object_detection/meta_architectures/center_net_meta_arch.py
...ject_detection/meta_architectures/center_net_meta_arch.py
+58
-13
research/object_detection/meta_architectures/center_net_meta_arch_tf2_test.py
...ction/meta_architectures/center_net_meta_arch_tf2_test.py
+3
-1
research/object_detection/model_lib.py
research/object_detection/model_lib.py
+6
-0
research/object_detection/model_lib_v2.py
research/object_detection/model_lib_v2.py
+69
-51
research/object_detection/model_main_tf2.py
research/object_detection/model_main_tf2.py
+4
-2
research/object_detection/models/keras_models/resnet_v1.py
research/object_detection/models/keras_models/resnet_v1.py
+2
-1
research/object_detection/protos/center_net.proto
research/object_detection/protos/center_net.proto
+12
-0
research/object_detection/utils/spatial_transform_ops.py
research/object_detection/utils/spatial_transform_ops.py
+2
-1
research/object_detection/utils/target_assigner_utils.py
research/object_detection/utils/target_assigner_utils.py
+64
-19
research/object_detection/utils/target_assigner_utils_test.py
...arch/object_detection/utils/target_assigner_utils_test.py
+29
-6
No files found.
research/object_detection/core/preprocessor.py
View file @
a04d9e0e
...
...
@@ -1414,6 +1414,7 @@ def _strict_random_crop_image(image,
label_confidences
=
None
,
multiclass_scores
=
None
,
masks
=
None
,
mask_weights
=
None
,
keypoints
=
None
,
keypoint_visibilities
=
None
,
densepose_num_points
=
None
,
...
...
@@ -1451,6 +1452,8 @@ def _strict_random_crop_image(image,
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
mask_weights: (optional) rank 1 float32 tensor with shape [num_instances]
with instance masks weights.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
...
...
@@ -1488,7 +1491,7 @@ def _strict_random_crop_image(image,
Boxes are in normalized form.
labels: new labels.
If label_weights, multiclass_scores, masks, keypoints,
If label_weights, multiclass_scores, masks,
mask_weights,
keypoints,
keypoint_visibilities, densepose_num_points, densepose_part_ids, or
densepose_surface_coords is not None, the function also returns:
label_weights: rank 1 float32 tensor with shape [num_instances].
...
...
@@ -1496,6 +1499,8 @@ def _strict_random_crop_image(image,
[num_instances, num_classes]
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
mask_weights: rank 1 float32 tensor with shape [num_instances] with mask
weights.
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
keypoint_visibilities: rank 2 bool tensor with shape
...
...
@@ -1605,6 +1610,12 @@ def _strict_random_crop_image(image,
0
]:
im_box_end
[
0
],
im_box_begin
[
1
]:
im_box_end
[
1
]]
result
.
append
(
new_masks
)
if
mask_weights
is
not
None
:
mask_weights_inside_window
=
tf
.
gather
(
mask_weights
,
inside_window_ids
)
mask_weights_completely_inside_window
=
tf
.
gather
(
mask_weights_inside_window
,
keep_ids
)
result
.
append
(
mask_weights_completely_inside_window
)
if
keypoints
is
not
None
:
keypoints_of_boxes_inside_window
=
tf
.
gather
(
keypoints
,
inside_window_ids
)
keypoints_of_boxes_completely_inside_window
=
tf
.
gather
(
...
...
@@ -1654,6 +1665,7 @@ def random_crop_image(image,
label_confidences
=
None
,
multiclass_scores
=
None
,
masks
=
None
,
mask_weights
=
None
,
keypoints
=
None
,
keypoint_visibilities
=
None
,
densepose_num_points
=
None
,
...
...
@@ -1701,6 +1713,8 @@ def random_crop_image(image,
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
mask_weights: (optional) rank 1 float32 tensor with shape [num_instances]
containing weights for each instance mask.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
...
...
@@ -1751,6 +1765,7 @@ def random_crop_image(image,
[num_instances, num_classes]
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
mask_weights: rank 1 float32 tensor with shape [num_instances].
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
keypoint_visibilities: rank 2 bool tensor with shape
...
...
@@ -1771,6 +1786,7 @@ def random_crop_image(image,
label_confidences
=
label_confidences
,
multiclass_scores
=
multiclass_scores
,
masks
=
masks
,
mask_weights
=
mask_weights
,
keypoints
=
keypoints
,
keypoint_visibilities
=
keypoint_visibilities
,
densepose_num_points
=
densepose_num_points
,
...
...
@@ -1803,6 +1819,8 @@ def random_crop_image(image,
outputs
.
append
(
multiclass_scores
)
if
masks
is
not
None
:
outputs
.
append
(
masks
)
if
mask_weights
is
not
None
:
outputs
.
append
(
mask_weights
)
if
keypoints
is
not
None
:
outputs
.
append
(
keypoints
)
if
keypoint_visibilities
is
not
None
:
...
...
@@ -4388,6 +4406,7 @@ def get_default_func_arg_map(include_label_weights=True,
include_label_confidences
=
False
,
include_multiclass_scores
=
False
,
include_instance_masks
=
False
,
include_instance_mask_weights
=
False
,
include_keypoints
=
False
,
include_keypoint_visibilities
=
False
,
include_dense_pose
=
False
,
...
...
@@ -4403,6 +4422,8 @@ def get_default_func_arg_map(include_label_weights=True,
multiclass scores, too.
include_instance_masks: If True, preprocessing functions will modify the
instance masks, too.
include_instance_mask_weights: If True, preprocessing functions will modify
the instance mask weights.
include_keypoints: If True, preprocessing functions will modify the
keypoints, too.
include_keypoint_visibilities: If True, preprocessing functions will modify
...
...
@@ -4434,6 +4455,11 @@ def get_default_func_arg_map(include_label_weights=True,
groundtruth_instance_masks
=
(
fields
.
InputDataFields
.
groundtruth_instance_masks
)
groundtruth_instance_mask_weights
=
None
if
include_instance_mask_weights
:
groundtruth_instance_mask_weights
=
(
fields
.
InputDataFields
.
groundtruth_instance_mask_weights
)
groundtruth_keypoints
=
None
if
include_keypoints
:
groundtruth_keypoints
=
fields
.
InputDataFields
.
groundtruth_keypoints
...
...
@@ -4503,7 +4529,8 @@ def get_default_func_arg_map(include_label_weights=True,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
,
groundtruth_label_weights
,
groundtruth_label_confidences
,
multiclass_scores
,
groundtruth_instance_masks
,
groundtruth_keypoints
,
multiclass_scores
,
groundtruth_instance_masks
,
groundtruth_instance_mask_weights
,
groundtruth_keypoints
,
groundtruth_keypoint_visibilities
,
groundtruth_dp_num_points
,
groundtruth_dp_part_ids
,
groundtruth_dp_surface_coords
),
random_pad_image
:
...
...
research/object_detection/core/preprocessor_test.py
View file @
a04d9e0e
...
...
@@ -1894,6 +1894,37 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
self
.
assertAllClose
(
new_boxes
.
flatten
(),
expected_boxes
.
flatten
())
def
testStrictRandomCropImageWithMaskWeights
(
self
):
def
graph_fn
():
image
=
self
.
createColorfulTestImage
()[
0
]
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
weights
=
self
.
createTestGroundtruthWeights
()
masks
=
tf
.
random_uniform
([
2
,
200
,
400
],
dtype
=
tf
.
float32
)
mask_weights
=
tf
.
constant
([
1.0
,
0.0
],
dtype
=
tf
.
float32
)
with
mock
.
patch
.
object
(
tf
.
image
,
'sample_distorted_bounding_box'
)
as
mock_sample_distorted_bounding_box
:
mock_sample_distorted_bounding_box
.
return_value
=
(
tf
.
constant
([
6
,
143
,
0
],
dtype
=
tf
.
int32
),
tf
.
constant
([
190
,
237
,
-
1
],
dtype
=
tf
.
int32
),
tf
.
constant
([[[
0.03
,
0.3575
,
0.98
,
0.95
]]],
dtype
=
tf
.
float32
))
results
=
preprocessor
.
_strict_random_crop_image
(
image
,
boxes
,
labels
,
weights
,
masks
=
masks
,
mask_weights
=
mask_weights
)
return
results
(
new_image
,
new_boxes
,
_
,
_
,
new_masks
,
new_mask_weights
)
=
self
.
execute_cpu
(
graph_fn
,
[])
expected_boxes
=
np
.
array
(
[[
0.0
,
0.0
,
0.75789469
,
1.0
],
[
0.23157893
,
0.24050637
,
0.75789469
,
1.0
]],
dtype
=
np
.
float32
)
self
.
assertAllEqual
(
new_image
.
shape
,
[
190
,
237
,
3
])
self
.
assertAllEqual
(
new_masks
.
shape
,
[
2
,
190
,
237
])
self
.
assertAllClose
(
new_mask_weights
,
[
1.0
,
0.0
])
self
.
assertAllClose
(
new_boxes
.
flatten
(),
expected_boxes
.
flatten
())
def
testStrictRandomCropImageWithKeypoints
(
self
):
def
graph_fn
():
image
=
self
.
createColorfulTestImage
()[
0
]
...
...
@@ -1947,6 +1978,7 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
labels
=
self
.
createTestLabels
()
weights
=
self
.
createTestGroundtruthWeights
()
masks
=
tf
.
random_uniform
([
2
,
200
,
400
],
dtype
=
tf
.
float32
)
mask_weights
=
tf
.
constant
([
1.0
,
0.0
],
dtype
=
tf
.
float32
)
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
image
,
...
...
@@ -1954,10 +1986,12 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
fields
.
InputDataFields
.
groundtruth_classes
:
labels
,
fields
.
InputDataFields
.
groundtruth_weights
:
weights
,
fields
.
InputDataFields
.
groundtruth_instance_masks
:
masks
,
fields
.
InputDataFields
.
groundtruth_instance_mask_weights
:
mask_weights
}
preprocessor_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_instance_masks
=
True
)
include_instance_masks
=
True
,
include_instance_mask_weights
=
True
)
preprocessing_options
=
[(
preprocessor
.
random_crop_image
,
{})]
...
...
@@ -1980,16 +2014,19 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
fields
.
InputDataFields
.
groundtruth_classes
]
distorted_masks
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
distorted_mask_weights
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_mask_weights
]
return
[
distorted_image
,
distorted_boxes
,
distorted_labels
,
distorted_masks
]
distorted_masks
,
distorted_mask_weights
]
(
distorted_image_
,
distorted_boxes_
,
distorted_labels_
,
distorted_masks_
)
=
self
.
execute_cpu
(
graph_fn
,
[])
distorted_masks_
,
distorted_mask_weights_
)
=
self
.
execute_cpu
(
graph_fn
,
[])
expected_boxes
=
np
.
array
([
[
0.0
,
0.0
,
0.75789469
,
1.0
],
[
0.23157893
,
0.24050637
,
0.75789469
,
1.0
],
],
dtype
=
np
.
float32
)
self
.
assertAllEqual
(
distorted_image_
.
shape
,
[
1
,
190
,
237
,
3
])
self
.
assertAllEqual
(
distorted_masks_
.
shape
,
[
2
,
190
,
237
])
self
.
assertAllClose
(
distorted_mask_weights_
,
[
1.0
,
0.0
])
self
.
assertAllEqual
(
distorted_labels_
,
[
1
,
2
])
self
.
assertAllClose
(
distorted_boxes_
.
flatten
(),
expected_boxes
.
flatten
())
...
...
research/object_detection/core/standard_fields.py
View file @
a04d9e0e
...
...
@@ -64,6 +64,7 @@ class InputDataFields(object):
proposal_boxes: coordinates of object proposal boxes.
proposal_objectness: objectness score of each proposal.
groundtruth_instance_masks: ground truth instance masks.
groundtruth_instance_mask_weights: ground truth instance masks weights.
groundtruth_instance_boundaries: ground truth instance boundaries.
groundtruth_instance_classes: instance mask-level class labels.
groundtruth_keypoints: ground truth keypoints.
...
...
@@ -122,6 +123,7 @@ class InputDataFields(object):
proposal_boxes
=
'proposal_boxes'
proposal_objectness
=
'proposal_objectness'
groundtruth_instance_masks
=
'groundtruth_instance_masks'
groundtruth_instance_mask_weights
=
'groundtruth_instance_mask_weights'
groundtruth_instance_boundaries
=
'groundtruth_instance_boundaries'
groundtruth_instance_classes
=
'groundtruth_instance_classes'
groundtruth_keypoints
=
'groundtruth_keypoints'
...
...
@@ -208,6 +210,7 @@ class BoxListFields(object):
weights: sample weights per bounding box.
objectness: objectness score per bounding box.
masks: masks per bounding box.
mask_weights: mask weights for each bounding box.
boundaries: boundaries per bounding box.
keypoints: keypoints per bounding box.
keypoint_visibilities: keypoint visibilities per bounding box.
...
...
@@ -228,6 +231,7 @@ class BoxListFields(object):
confidences
=
'confidences'
objectness
=
'objectness'
masks
=
'masks'
mask_weights
=
'mask_weights'
boundaries
=
'boundaries'
keypoints
=
'keypoints'
keypoint_visibilities
=
'keypoint_visibilities'
...
...
research/object_detection/core/target_assigner.py
View file @
a04d9e0e
...
...
@@ -1409,8 +1409,10 @@ class CenterNetKeypointTargetAssigner(object):
[batch_size, num_keypoints] representing number of instances for each
keypoint type.
valid_mask: A float tensor with shape [batch_size, output_height,
output_width] where all values within the regions of the blackout boxes
are 0.0 and 1.0 else where.
output_width, num_keypoints] where all values within the regions of the
blackout boxes are 0.0 and 1.0 else where. Note that the blackout boxes
are per keypoint type and are blacked out if the keypoint
visibility/weight (of the corresponding keypoint type) is zero.
"""
out_width
=
tf
.
cast
(
tf
.
maximum
(
width
//
self
.
_stride
,
1
),
tf
.
float32
)
out_height
=
tf
.
cast
(
tf
.
maximum
(
height
//
self
.
_stride
,
1
),
tf
.
float32
)
...
...
@@ -1480,13 +1482,17 @@ class CenterNetKeypointTargetAssigner(object):
keypoint_std_dev
=
keypoint_std_dev
*
tf
.
stack
(
[
sigma
]
*
num_keypoints
,
axis
=
1
)
# Generate the valid region mask to ignore regions with target class but
# no corresponding keypoints.
# Shape: [num_instances].
blackout
=
tf
.
logical_and
(
classes
[:,
self
.
_class_id
]
>
0
,
tf
.
reduce_max
(
kp_weights
,
axis
=
1
)
<
1e-3
)
valid_mask
=
ta_utils
.
blackout_pixel_weights_by_box_regions
(
out_height
,
out_width
,
boxes
.
get
(),
blackout
)
# Generate the per-keypoint type valid region mask to ignore regions
# with keypoint weights equal to zeros (e.g. visibility is 0).
# shape of valid_mask: [out_height, out_width, num_keypoints]
kp_weight_list
=
tf
.
unstack
(
kp_weights
,
axis
=
1
)
valid_mask_channel_list
=
[]
for
kp_weight
in
kp_weight_list
:
blackout
=
kp_weight
<
1e-3
valid_mask_channel_list
.
append
(
ta_utils
.
blackout_pixel_weights_by_box_regions
(
out_height
,
out_width
,
boxes
.
get
(),
blackout
))
valid_mask
=
tf
.
stack
(
valid_mask_channel_list
,
axis
=
2
)
valid_mask_list
.
append
(
valid_mask
)
# Apply the Gaussian kernel to the keypoint coordinates. Returned heatmap
...
...
@@ -2001,8 +2007,8 @@ class CenterNetMaskTargetAssigner(object):
self
.
_stride
=
stride
def
assign_segmentation_targets
(
self
,
gt_masks_list
,
gt_classes_list
,
mask_resize_method
=
ResizeMethod
.
BILINEAR
):
self
,
gt_masks_list
,
gt_classes_list
,
gt_boxes_list
=
None
,
gt_mask_weights_list
=
None
,
mask_resize_method
=
ResizeMethod
.
BILINEAR
):
"""Computes the segmentation targets.
This utility produces a semantic segmentation mask for each class, starting
...
...
@@ -2016,15 +2022,25 @@ class CenterNetMaskTargetAssigner(object):
gt_classes_list: A list of float tensors with shape [num_boxes,
num_classes] representing the one-hot encoded class labels for each box
in the gt_boxes_list.
gt_boxes_list: An optional list of float tensors with shape [num_boxes, 4]
with normalized boxes corresponding to each mask. The boxes are used to
spatially allocate mask weights.
gt_mask_weights_list: An optional list of float tensors with shape
[num_boxes] with weights for each mask. If a mask has a zero weight, it
indicates that the box region associated with the mask should not
contribute to the loss. If not provided, will use a per-pixel weight of
1.
mask_resize_method: A `tf.compat.v2.image.ResizeMethod`. The method to use
when resizing masks from input resolution to output resolution.
Returns:
segmentation_targets: An int32 tensor of size [batch_size, output_height,
output_width, num_classes] representing the class of each location in
the output space.
segmentation_weight: A float32 tensor of size [batch_size, output_height,
output_width] indicating the loss weight to apply at each location.
"""
# TODO(ronnyvotel): Handle groundtruth weights.
_
,
num_classes
=
shape_utils
.
combined_static_and_dynamic_shape
(
gt_classes_list
[
0
])
...
...
@@ -2033,8 +2049,35 @@ class CenterNetMaskTargetAssigner(object):
output_height
=
tf
.
maximum
(
input_height
//
self
.
_stride
,
1
)
output_width
=
tf
.
maximum
(
input_width
//
self
.
_stride
,
1
)
if
gt_boxes_list
is
None
:
gt_boxes_list
=
[
None
]
*
len
(
gt_masks_list
)
if
gt_mask_weights_list
is
None
:
gt_mask_weights_list
=
[
None
]
*
len
(
gt_masks_list
)
segmentation_targets_list
=
[]
for
gt_masks
,
gt_classes
in
zip
(
gt_masks_list
,
gt_classes_list
):
segmentation_weights_list
=
[]
for
gt_boxes
,
gt_masks
,
gt_mask_weights
,
gt_classes
in
zip
(
gt_boxes_list
,
gt_masks_list
,
gt_mask_weights_list
,
gt_classes_list
):
if
gt_boxes
is
not
None
and
gt_mask_weights
is
not
None
:
boxes
=
box_list
.
BoxList
(
gt_boxes
)
# Convert the box coordinates to absolute output image dimension space.
boxes_absolute
=
box_list_ops
.
to_absolute_coordinates
(
boxes
,
output_height
,
output_width
)
# Generate a segmentation weight that applies mask weights in object
# regions.
blackout
=
gt_mask_weights
<=
0
segmentation_weight_for_image
=
(
ta_utils
.
blackout_pixel_weights_by_box_regions
(
output_height
,
output_width
,
boxes_absolute
.
get
(),
blackout
,
weights
=
gt_mask_weights
))
segmentation_weights_list
.
append
(
segmentation_weight_for_image
)
else
:
segmentation_weights_list
.
append
(
tf
.
ones
((
output_height
,
output_width
),
dtype
=
tf
.
float32
))
gt_masks
=
_resize_masks
(
gt_masks
,
output_height
,
output_width
,
mask_resize_method
)
gt_masks
=
gt_masks
[:,
:,
:,
tf
.
newaxis
]
...
...
@@ -2047,7 +2090,8 @@ class CenterNetMaskTargetAssigner(object):
segmentation_targets_list
.
append
(
segmentations_for_image
)
segmentation_target
=
tf
.
stack
(
segmentation_targets_list
,
axis
=
0
)
return
segmentation_target
segmentation_weight
=
tf
.
stack
(
segmentation_weights_list
,
axis
=
0
)
return
segmentation_target
,
segmentation_weight
class
CenterNetDensePoseTargetAssigner
(
object
):
...
...
research/object_detection/core/target_assigner_test.py
View file @
a04d9e0e
...
...
@@ -1699,7 +1699,7 @@ class CenterNetKeypointTargetAssignerTest(test_case.TestCase):
np
.
array
([[
0.0
,
0.0
,
0.3
,
0.3
],
[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.
0
,
0.
0
,
1.0
,
1.0
]]),
[
0.
5
,
0.
5
,
1.0
,
1.0
]]),
dtype
=
tf
.
float32
)
]
...
...
@@ -1728,15 +1728,20 @@ class CenterNetKeypointTargetAssignerTest(test_case.TestCase):
# Verify the number of instances is correct.
np
.
testing
.
assert_array_almost_equal
([[
0
,
1
]],
num_instances_batch
)
self
.
assertAllEqual
([
1
,
30
,
20
,
2
],
valid_mask
.
shape
)
# When calling the function, we specify the class id to be 1 (1th and 3rd)
# instance and the keypoint indices to be [0, 2], meaning that the 1st
# instance is the target class with no valid keypoints in it. As a result,
# the region of the 1st instance boxing box should be blacked out
# (0.0, 0.0, 0.5, 0.5), transfering to (0, 0, 15, 10) in absolute output
# space.
self
.
assertAlmostEqual
(
np
.
sum
(
valid_mask
[:,
0
:
16
,
0
:
11
]),
0.0
)
# All other values are 1.0 so the sum is: 30 * 20 - 16 * 11 = 424.
self
.
assertAlmostEqual
(
np
.
sum
(
valid_mask
),
424.0
)
# the region of both keypoint types of the 1st instance boxing box should be
# blacked out (0.0, 0.0, 0.5, 0.5), transfering to (0, 0, 15, 10) in
# absolute output space.
self
.
assertAlmostEqual
(
np
.
sum
(
valid_mask
[:,
0
:
15
,
0
:
10
,
0
:
2
]),
0.0
)
# For the 2nd instance, only the 1st keypoint has visibility of 0 so only
# the corresponding valid mask contains zeros.
self
.
assertAlmostEqual
(
np
.
sum
(
valid_mask
[:,
15
:
30
,
10
:
20
,
0
]),
0.0
)
# All other values are 1.0 so the sum is:
# 30 * 20 * 2 - 15 * 10 * 2 - 15 * 10 * 1 = 750.
self
.
assertAlmostEqual
(
np
.
sum
(
valid_mask
),
750.0
)
def
test_assign_keypoints_offset_targets
(
self
):
def
graph_fn
():
...
...
@@ -2090,13 +2095,31 @@ class CenterNetMaskTargetAssignerTest(test_case.TestCase):
tf
.
constant
([[
0.
,
1.
,
0.
],
[
0.
,
1.
,
0.
]],
dtype
=
tf
.
float32
)
]
gt_boxes_list
=
[
# Example 0.
tf
.
constant
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.0
,
0.5
,
0.5
,
1.0
],
[
0.0
,
0.0
,
1.0
,
1.0
]],
dtype
=
tf
.
float32
),
# Example 1.
tf
.
constant
([[
0.0
,
0.0
,
1.0
,
1.0
],
[
0.5
,
0.0
,
1.0
,
0.5
]],
dtype
=
tf
.
float32
)
]
gt_mask_weights_list
=
[
# Example 0.
tf
.
constant
([
0.0
,
1.0
,
1.0
],
dtype
=
tf
.
float32
),
# Example 1.
tf
.
constant
([
1.0
,
1.0
],
dtype
=
tf
.
float32
)
]
cn_assigner
=
targetassigner
.
CenterNetMaskTargetAssigner
(
stride
=
2
)
segmentation_target
=
cn_assigner
.
assign_segmentation_targets
(
gt_masks_list
=
gt_masks_list
,
gt_classes_list
=
gt_classes_list
,
mask_resize_method
=
targetassigner
.
ResizeMethod
.
NEAREST_NEIGHBOR
)
return
segmentation_target
segmentation_target
=
self
.
execute
(
graph_fn
,
[])
segmentation_target
,
segmentation_weight
=
(
cn_assigner
.
assign_segmentation_targets
(
gt_masks_list
=
gt_masks_list
,
gt_classes_list
=
gt_classes_list
,
gt_boxes_list
=
gt_boxes_list
,
gt_mask_weights_list
=
gt_mask_weights_list
,
mask_resize_method
=
targetassigner
.
ResizeMethod
.
NEAREST_NEIGHBOR
))
return
segmentation_target
,
segmentation_weight
segmentation_target
,
segmentation_weight
=
self
.
execute
(
graph_fn
,
[])
expected_seg_target
=
np
.
array
([
# Example 0 [[class 0, class 1], [background, class 0]]
...
...
@@ -2108,13 +2131,18 @@ class CenterNetMaskTargetAssignerTest(test_case.TestCase):
],
dtype
=
np
.
float32
)
np
.
testing
.
assert_array_almost_equal
(
expected_seg_target
,
segmentation_target
)
expected_seg_weight
=
np
.
array
([
[[
0
,
1
],
[
1
,
1
]],
[[
1
,
1
],
[
1
,
1
]]],
dtype
=
np
.
float32
)
np
.
testing
.
assert_array_almost_equal
(
expected_seg_weight
,
segmentation_weight
)
def
test_assign_segmentation_targets_no_objects
(
self
):
def
graph_fn
():
gt_masks_list
=
[
tf
.
zeros
((
0
,
5
,
5
))]
gt_classes_list
=
[
tf
.
zeros
((
0
,
10
))]
cn_assigner
=
targetassigner
.
CenterNetMaskTargetAssigner
(
stride
=
1
)
segmentation_target
=
cn_assigner
.
assign_segmentation_targets
(
segmentation_target
,
_
=
cn_assigner
.
assign_segmentation_targets
(
gt_masks_list
=
gt_masks_list
,
gt_classes_list
=
gt_classes_list
,
mask_resize_method
=
targetassigner
.
ResizeMethod
.
NEAREST_NEIGHBOR
)
...
...
research/object_detection/data_decoders/tf_example_decoder.py
View file @
a04d9e0e
...
...
@@ -373,6 +373,11 @@ class TfExampleDecoder(data_decoder.DataDecoder):
self
.
_decode_png_instance_masks
))
else
:
raise
ValueError
(
'Did not recognize the `instance_mask_type` option.'
)
self
.
keys_to_features
[
'image/object/mask/weight'
]
=
(
tf
.
VarLenFeature
(
tf
.
float32
))
self
.
items_to_handlers
[
fields
.
InputDataFields
.
groundtruth_instance_mask_weights
]
=
(
slim_example_decoder
.
Tensor
(
'image/object/mask/weight'
))
if
load_dense_pose
:
self
.
keys_to_features
[
'image/object/densepose/num'
]
=
(
tf
.
VarLenFeature
(
tf
.
int64
))
...
...
@@ -491,6 +496,10 @@ class TfExampleDecoder(data_decoder.DataDecoder):
tensor of shape [None, num_keypoints] containing keypoint visibilites.
fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of
shape [None, None, None] containing instance masks.
fields.InputDataFields.groundtruth_instance_mask_weights - 1D float32
tensor of shape [None] containing weights. These are typically values
in {0.0, 1.0} which indicate whether to consider the mask related to an
object.
fields.InputDataFields.groundtruth_image_classes - 1D int64 of shape
[None] containing classes for the boxes.
fields.InputDataFields.multiclass_scores - 1D float32 tensor of shape
...
...
@@ -531,6 +540,21 @@ class TfExampleDecoder(data_decoder.DataDecoder):
0
),
lambda
:
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_weights
],
default_groundtruth_weights
)
if
fields
.
InputDataFields
.
groundtruth_instance_masks
in
tensor_dict
:
gt_instance_masks
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
num_gt_instance_masks
=
tf
.
shape
(
gt_instance_masks
)[
0
]
gt_instance_mask_weights
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_mask_weights
]
num_gt_instance_mask_weights
=
tf
.
shape
(
gt_instance_mask_weights
)[
0
]
def
default_groundtruth_instance_mask_weights
():
return
tf
.
ones
([
num_gt_instance_masks
],
dtype
=
tf
.
float32
)
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_mask_weights
]
=
(
tf
.
cond
(
tf
.
greater
(
num_gt_instance_mask_weights
,
0
),
lambda
:
gt_instance_mask_weights
,
default_groundtruth_instance_mask_weights
))
if
fields
.
InputDataFields
.
groundtruth_keypoints
in
tensor_dict
:
# Set all keypoints that are not labeled to NaN.
gt_kpt_fld
=
fields
.
InputDataFields
.
groundtruth_keypoints
...
...
research/object_detection/data_decoders/tf_example_decoder_test.py
View file @
a04d9e0e
...
...
@@ -1225,6 +1225,9 @@ class TfExampleDecoderTest(test_case.TestCase):
self
.
assertAllEqual
(
instance_masks
.
astype
(
np
.
float32
),
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
])
self
.
assertAllEqual
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_mask_weights
],
[
1
,
1
,
1
,
1
])
self
.
assertAllEqual
(
object_classes
,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
])
...
...
@@ -1272,6 +1275,71 @@ class TfExampleDecoderTest(test_case.TestCase):
self
.
assertNotIn
(
fields
.
InputDataFields
.
groundtruth_instance_masks
,
tensor_dict
)
def
testDecodeInstanceSegmentationWithWeights
(
self
):
num_instances
=
4
image_height
=
5
image_width
=
3
# Randomly generate image.
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
image_height
,
image_width
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
,
_
=
self
.
_create_encoded_and_decoded_data
(
image_tensor
,
'jpeg'
)
# Randomly generate instance segmentation masks.
instance_masks
=
(
np
.
random
.
randint
(
2
,
size
=
(
num_instances
,
image_height
,
image_width
)).
astype
(
np
.
float32
))
instance_masks_flattened
=
np
.
reshape
(
instance_masks
,
[
-
1
])
instance_mask_weights
=
np
.
array
([
1
,
1
,
0
,
1
],
dtype
=
np
.
float32
)
# Randomly generate class labels for each instance.
object_classes
=
np
.
random
.
randint
(
100
,
size
=
(
num_instances
)).
astype
(
np
.
int64
)
def
graph_fn
():
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/format'
:
dataset_util
.
bytes_feature
(
six
.
b
(
'jpeg'
)),
'image/height'
:
dataset_util
.
int64_feature
(
image_height
),
'image/width'
:
dataset_util
.
int64_feature
(
image_width
),
'image/object/mask'
:
dataset_util
.
float_list_feature
(
instance_masks_flattened
),
'image/object/mask/weight'
:
dataset_util
.
float_list_feature
(
instance_mask_weights
),
'image/object/class/label'
:
dataset_util
.
int64_list_feature
(
object_classes
)
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
load_instance_masks
=
True
)
output
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
(
(
output
[
fields
.
InputDataFields
.
groundtruth_instance_masks
].
get_shape
(
).
as_list
()),
[
4
,
5
,
3
])
self
.
assertAllEqual
(
output
[
fields
.
InputDataFields
.
groundtruth_instance_mask_weights
],
[
1
,
1
,
0
,
1
])
self
.
assertAllEqual
((
output
[
fields
.
InputDataFields
.
groundtruth_classes
].
get_shape
().
as_list
()),
[
4
])
return
output
tensor_dict
=
self
.
execute_cpu
(
graph_fn
,
[])
self
.
assertAllEqual
(
instance_masks
.
astype
(
np
.
float32
),
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
])
self
.
assertAllEqual
(
object_classes
,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
])
def
testDecodeImageLabels
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
,
_
=
self
.
_create_encoded_and_decoded_data
(
...
...
research/object_detection/g3doc/running_on_mobile_tf2.md
View file @
a04d9e0e
...
...
@@ -13,17 +13,22 @@ on-device machine learning inference with low latency and a small binary size.
TensorFlow Lite uses many techniques for this such as quantized kernels that
allow smaller and faster (fixed-point math) models.
This document shows how elgible models from the
This document shows how el
i
gible models from the
[
TF2 Detection zoo
](
https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md
)
can be converted for inference with TFLite.
can be converted for inference with TFLite. See this Colab tutorial for a
runnable tutorial that walks you through the steps explained in this document:
<a
target=
"_blank"
href=
"https://colab.research.google.com/github/tensorflow/models/blob/master/research/object_detection/colab_tutorials/convert_odt_model_to_TFLite.ipynb"
><img
src=
"https://www.tensorflow.org/images/colab_logo_32px.png"
/>
Run
in Google Colab
</a>
For an end-to-end Python guide on how to fine-tune an SSD model for mobile
inference, look at
[
this Colab
](
../colab_tutorials/eager_few_shot_od_training_tflite.ipynb
)
.
**NOTE:**
TFLite currently only supports
**SSD Architectures**
(excluding
EfficientDet) for boxes-based detection. Support for EfficientDet is coming
soon.
EfficientDet) for boxes-based detection. Support for EfficientDet is provided
via the
[
TFLite Model Maker
](
https://www.tensorflow.org/lite/tutorials/model_maker_object_detection
)
library.
The output model has the following inputs & outputs:
...
...
@@ -87,9 +92,46 @@ converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8,
converter
.
representative_dataset
=
<
...
>
```
### Step 3: Add Metadata
The model needs to be packed with
[
TFLite Metadata
](
https://www.tensorflow.org/lite/convert/metadata
)
to enable
easy integration into mobile apps using the
[
TFLite Task Library
](
https://www.tensorflow.org/lite/inference_with_metadata/task_library/object_detector
)
.
This metadata helps the inference code perform the correct pre & post processing
as required by the model. Use the following code to create the metadata.
```
python
from
tflite_support.metadata_writers
import
object_detector
from
tflite_support.metadata_writers
import
writer_utils
writer
=
object_detector
.
MetadataWriter
.
create_for_inference
(
writer_utils
.
load_file
(
_TFLITE_MODEL_PATH
),
input_norm_mean
=
[
0
],
input_norm_std
=
[
255
],
label_file_paths
=
[
_TFLITE_LABEL_PATH
])
writer_utils
.
save_file
(
writer
.
populate
(),
_TFLITE_MODEL_WITH_METADATA_PATH
)
```
See the TFLite Metadata Writer API
[
documentation
](
https://www.tensorflow.org/lite/convert/metadata_writer_tutorial#object_detectors
)
for more details.
## Running our model on Android
To run our TensorFlow Lite model on device, we will use Android Studio to build
### Integrate the model into your app
You can use the TFLite Task Library's
[
ObjectDetector API
](
https://www.tensorflow.org/lite/inference_with_metadata/task_library/object_detector
)
to integrate the model into your Android app.
```
java
// Initialization
ObjectDetectorOptions
options
=
ObjectDetectorOptions
.
builder
().
setMaxResults
(
1
).
build
();
ObjectDetector
objectDetector
=
ObjectDetector
.
createFromFileAndOptions
(
context
,
modelFile
,
options
);
// Run inference
List
<
Detection
>
results
=
objectDetector
.
detect
(
image
);
```
### Test the model using the TFLite sample app
To test our TensorFlow Lite model on device, we will use Android Studio to build
and run the TensorFlow Lite detection example with the new model. The example is
found in the
[
TensorFlow examples repository
](
https://github.com/tensorflow/examples
)
under
...
...
@@ -102,7 +144,7 @@ that support API >= 21. Additional details are available on the
Next we need to point the app to our new detect.tflite file and give it the
names of our new labels. Specifically, we will copy our TensorFlow Lite
flatbuffer
to the app assets directory with the following command:
model with metadata
to the app assets directory with the following command:
```
shell
mkdir
$TF_EXAMPLES
/lite/examples/object_detection/android/app/src/main/assets
...
...
@@ -110,9 +152,6 @@ cp /tmp/tflite/detect.tflite \
$TF_EXAMPLES
/lite/examples/object_detection/android/app/src/main/assets
```
You will also need to copy your new labelmap labelmap.txt to the assets
directory.
We will now edit the gradle build file to use these assets. First, open the
`build.gradle`
file
`$TF_EXAMPLES/lite/examples/object_detection/android/app/build.gradle`
. Comment
...
...
@@ -122,23 +161,12 @@ out the model download script to avoid your assets being overwritten:
// apply from:
'download_model.gradle'
```
If your model is named
`detect.tflite`
, and your labels file
`labelmap.txt`
, the
example will use them automatically as long as they've been properly copied into
the base assets directory. If you need to use a custom path or filename, open up
the
If your model is named
`detect.tflite`
, the example will use it automatically as
long as they've been properly copied into the base assets directory. If you need
to use a custom path or filename, open up the
$TF_EXAMPLES/lite/examples/object_detection/android/app/src/main/java/org/tensorflow/demo/DetectorActivity.java
file in a text editor and find the definition of TF_OD_API_LABELS_FILE. Update
this path to point to your new label map file: "labels_list.txt". Note that if
your model is quantized, the flag TF_OD_API_IS_QUANTIZED is set to true, and if
your model is floating point, the flag TF_OD_API_IS_QUANTIZED is set to false.
This new section of DetectorActivity.java should now look as follows for a
quantized model:
```
java
private
static
final
boolean
TF_OD_API_IS_QUANTIZED
=
true
;
private
static
final
String
TF_OD_API_MODEL_FILE
=
"detect.tflite"
;
private
static
final
String
TF_OD_API_LABELS_FILE
=
"labels_list.txt"
;
```
file in a text editor and find the definition of TF_OD_API_MODEL_FILE. Update
this path to point to your new model file.
Once you’ve copied the TensorFlow Lite model and edited the gradle build script
to not use the downloaded assets, you can build and deploy the app using the
...
...
research/object_detection/inputs.py
View file @
a04d9e0e
...
...
@@ -479,6 +479,7 @@ def pad_input_data_to_static_shapes(tensor_dict,
input_fields
.
groundtruth_instance_masks
:
[
max_num_boxes
,
height
,
width
],
input_fields
.
groundtruth_instance_mask_weights
:
[
max_num_boxes
],
input_fields
.
groundtruth_is_crowd
:
[
max_num_boxes
],
input_fields
.
groundtruth_group_of
:
[
max_num_boxes
],
input_fields
.
groundtruth_area
:
[
max_num_boxes
],
...
...
@@ -601,6 +602,8 @@ def augment_input_data(tensor_dict, data_augmentation_options):
include_instance_masks
=
(
fields
.
InputDataFields
.
groundtruth_instance_masks
in
tensor_dict
)
include_instance_mask_weights
=
(
fields
.
InputDataFields
.
groundtruth_instance_mask_weights
in
tensor_dict
)
include_keypoints
=
(
fields
.
InputDataFields
.
groundtruth_keypoints
in
tensor_dict
)
include_keypoint_visibilities
=
(
...
...
@@ -624,6 +627,7 @@ def augment_input_data(tensor_dict, data_augmentation_options):
include_label_confidences
=
include_label_confidences
,
include_multiclass_scores
=
include_multiclass_scores
,
include_instance_masks
=
include_instance_masks
,
include_instance_mask_weights
=
include_instance_mask_weights
,
include_keypoints
=
include_keypoints
,
include_keypoint_visibilities
=
include_keypoint_visibilities
,
include_dense_pose
=
include_dense_pose
,
...
...
@@ -652,6 +656,7 @@ def _get_labels_dict(input_dict):
fields
.
InputDataFields
.
groundtruth_keypoint_depths
,
fields
.
InputDataFields
.
groundtruth_keypoint_depth_weights
,
fields
.
InputDataFields
.
groundtruth_instance_masks
,
fields
.
InputDataFields
.
groundtruth_instance_mask_weights
,
fields
.
InputDataFields
.
groundtruth_area
,
fields
.
InputDataFields
.
groundtruth_is_crowd
,
fields
.
InputDataFields
.
groundtruth_group_of
,
...
...
@@ -804,6 +809,9 @@ def train_input(train_config, train_input_config,
labels[fields.InputDataFields.groundtruth_instance_masks] is a
[batch_size, num_boxes, H, W] float32 tensor containing only binary
values, which represent instance masks for objects.
labels[fields.InputDataFields.groundtruth_instance_mask_weights] is a
[batch_size, num_boxes] float32 tensor containing groundtruth weights
for each instance mask.
labels[fields.InputDataFields.groundtruth_keypoints] is a
[batch_size, num_boxes, num_keypoints, 2] float32 tensor containing
keypoints for each box.
...
...
@@ -961,6 +969,9 @@ def eval_input(eval_config, eval_input_config, model_config,
labels[fields.InputDataFields.groundtruth_instance_masks] is a
[1, num_boxes, H, W] float32 tensor containing only binary values,
which represent instance masks for objects.
labels[fields.InputDataFields.groundtruth_instance_mask_weights] is a
[1, num_boxes] float32 tensor containing groundtruth weights for each
instance mask.
labels[fields.InputDataFields.groundtruth_weights] is a
[batch_size, num_boxes, num_keypoints] float32 tensor containing
groundtruth weights for the keypoints.
...
...
research/object_detection/inputs_test.py
View file @
a04d9e0e
...
...
@@ -795,15 +795,20 @@ class DataAugmentationFnTest(test_case.TestCase):
fields
.
InputDataFields
.
image
:
tf
.
constant
(
np
.
random
.
rand
(
10
,
10
,
3
).
astype
(
np
.
float32
)),
fields
.
InputDataFields
.
groundtruth_instance_masks
:
tf
.
constant
(
np
.
zeros
([
2
,
10
,
10
],
np
.
uint8
))
tf
.
constant
(
np
.
zeros
([
2
,
10
,
10
],
np
.
uint8
)),
fields
.
InputDataFields
.
groundtruth_instance_mask_weights
:
tf
.
constant
([
1.0
,
0.0
],
np
.
float32
)
}
augmented_tensor_dict
=
data_augmentation_fn
(
tensor_dict
=
tensor_dict
)
return
(
augmented_tensor_dict
[
fields
.
InputDataFields
.
image
],
augmented_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
])
image
,
masks
=
self
.
execute_cpu
(
graph_fn
,
[])
groundtruth_instance_masks
],
augmented_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_mask_weights
])
image
,
masks
,
mask_weights
=
self
.
execute_cpu
(
graph_fn
,
[])
self
.
assertAllEqual
(
image
.
shape
,
[
20
,
20
,
3
])
self
.
assertAllEqual
(
masks
.
shape
,
[
2
,
20
,
20
])
self
.
assertAllClose
(
mask_weights
,
[
1.0
,
0.0
])
def
test_include_keypoints_in_data_augmentation
(
self
):
data_augmentation_options
=
[
...
...
research/object_detection/meta_architectures/center_net_meta_arch.py
View file @
a04d9e0e
...
...
@@ -1668,7 +1668,9 @@ def predicted_embeddings_at_object_centers(embedding_predictions,
class
ObjectDetectionParams
(
collections
.
namedtuple
(
'ObjectDetectionParams'
,
[
'localization_loss'
,
'scale_loss_weight'
,
'offset_loss_weight'
,
'task_loss_weight'
'task_loss_weight'
,
'scale_head_num_filters'
,
'scale_head_kernel_sizes'
,
'offset_head_num_filters'
,
'offset_head_kernel_sizes'
])):
"""Namedtuple to host object detection related parameters.
...
...
@@ -1684,7 +1686,11 @@ class ObjectDetectionParams(
localization_loss
,
scale_loss_weight
,
offset_loss_weight
,
task_loss_weight
=
1.0
):
task_loss_weight
=
1.0
,
scale_head_num_filters
=
(
256
),
scale_head_kernel_sizes
=
(
3
),
offset_head_num_filters
=
(
256
),
offset_head_kernel_sizes
=
(
3
)):
"""Constructor with default values for ObjectDetectionParams.
Args:
...
...
@@ -1697,13 +1703,23 @@ class ObjectDetectionParams(
depending on the input size.
offset_loss_weight: float, The weight for localizing center offsets.
task_loss_weight: float, the weight of the object detection loss.
scale_head_num_filters: filter numbers of the convolutional layers used
by the object detection box scale prediction head.
scale_head_kernel_sizes: kernel size of the convolutional layers used
by the object detection box scale prediction head.
offset_head_num_filters: filter numbers of the convolutional layers used
by the object detection box offset prediction head.
offset_head_kernel_sizes: kernel size of the convolutional layers used
by the object detection box offset prediction head.
Returns:
An initialized ObjectDetectionParams namedtuple.
"""
return
super
(
ObjectDetectionParams
,
cls
).
__new__
(
cls
,
localization_loss
,
scale_loss_weight
,
offset_loss_weight
,
task_loss_weight
)
offset_loss_weight
,
task_loss_weight
,
scale_head_num_filters
,
scale_head_kernel_sizes
,
offset_head_num_filters
,
offset_head_kernel_sizes
)
class
KeypointEstimationParams
(
...
...
@@ -1937,7 +1953,8 @@ class ObjectCenterParams(
class
MaskParams
(
collections
.
namedtuple
(
'MaskParams'
,
[
'classification_loss'
,
'task_loss_weight'
,
'mask_height'
,
'mask_width'
,
'score_threshold'
,
'heatmap_bias_init'
'score_threshold'
,
'heatmap_bias_init'
,
'mask_head_num_filters'
,
'mask_head_kernel_sizes'
])):
"""Namedtuple to store mask prediction related parameters."""
...
...
@@ -1949,7 +1966,9 @@ class MaskParams(
mask_height
=
256
,
mask_width
=
256
,
score_threshold
=
0.5
,
heatmap_bias_init
=-
2.19
):
heatmap_bias_init
=-
2.19
,
mask_head_num_filters
=
(
256
),
mask_head_kernel_sizes
=
(
3
)):
"""Constructor with default values for MaskParams.
Args:
...
...
@@ -1963,6 +1982,10 @@ class MaskParams(
heatmap_bias_init: float, the initial value of bias in the convolutional
kernel of the semantic segmentation prediction head. If set to None, the
bias is initialized with zeros.
mask_head_num_filters: filter numbers of the convolutional layers used
by the mask prediction head.
mask_head_kernel_sizes: kernel size of the convolutional layers used
by the mask prediction head.
Returns:
An initialized MaskParams namedtuple.
...
...
@@ -1970,7 +1993,8 @@ class MaskParams(
return
super
(
MaskParams
,
cls
).
__new__
(
cls
,
classification_loss
,
task_loss_weight
,
mask_height
,
mask_width
,
score_threshold
,
heatmap_bias_init
)
score_threshold
,
heatmap_bias_init
,
mask_head_num_filters
,
mask_head_kernel_sizes
)
class
DensePoseParams
(
...
...
@@ -2312,10 +2336,18 @@ class CenterNetMetaArch(model.DetectionModel):
if
self
.
_od_params
is
not
None
:
prediction_heads
[
BOX_SCALE
]
=
self
.
_make_prediction_net_list
(
num_feature_outputs
,
NUM_SIZE_CHANNELS
,
name
=
'box_scale'
,
num_feature_outputs
,
NUM_SIZE_CHANNELS
,
kernel_sizes
=
self
.
_od_params
.
scale_head_kernel_sizes
,
num_filters
=
self
.
_od_params
.
scale_head_num_filters
,
name
=
'box_scale'
,
unit_height_conv
=
unit_height_conv
)
prediction_heads
[
BOX_OFFSET
]
=
self
.
_make_prediction_net_list
(
num_feature_outputs
,
NUM_OFFSET_CHANNELS
,
name
=
'box_offset'
,
num_feature_outputs
,
NUM_OFFSET_CHANNELS
,
kernel_sizes
=
self
.
_od_params
.
offset_head_kernel_sizes
,
num_filters
=
self
.
_od_params
.
offset_head_num_filters
,
name
=
'box_offset'
,
unit_height_conv
=
unit_height_conv
)
if
self
.
_kp_params_dict
is
not
None
:
...
...
@@ -2370,6 +2402,8 @@ class CenterNetMetaArch(model.DetectionModel):
prediction_heads
[
SEGMENTATION_HEATMAP
]
=
self
.
_make_prediction_net_list
(
num_feature_outputs
,
num_classes
,
kernel_sizes
=
self
.
_mask_params
.
mask_head_kernel_sizes
,
num_filters
=
self
.
_mask_params
.
mask_head_num_filters
,
bias_fill
=
self
.
_mask_params
.
heatmap_bias_init
,
name
=
'seg_heatmap'
,
unit_height_conv
=
unit_height_conv
)
...
...
@@ -2721,8 +2755,7 @@ class CenterNetMetaArch(model.DetectionModel):
gt_weights_list
=
gt_weights_list
,
gt_classes_list
=
gt_classes_list
,
gt_boxes_list
=
gt_boxes_list
)
flattened_valid_mask
=
_flatten_spatial_dimensions
(
tf
.
expand_dims
(
valid_mask_batch
,
axis
=-
1
))
flattened_valid_mask
=
_flatten_spatial_dimensions
(
valid_mask_batch
)
flattened_heapmap_targets
=
_flatten_spatial_dimensions
(
keypoint_heatmap
)
# Sum over the number of instances per keypoint types to get the total
# number of keypoints. Note that this is used to normalized the loss and we
...
...
@@ -2945,20 +2978,32 @@ class CenterNetMetaArch(model.DetectionModel):
Returns:
A float scalar tensor representing the mask loss.
"""
gt_boxes_list
=
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
boxes
)
gt_masks_list
=
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
masks
)
gt_mask_weights_list
=
None
if
self
.
groundtruth_has_field
(
fields
.
BoxListFields
.
mask_weights
):
gt_mask_weights_list
=
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
mask_weights
)
gt_classes_list
=
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
classes
)
# Convert the groundtruth to targets.
assigner
=
self
.
_target_assigner_dict
[
SEGMENTATION_TASK
]
heatmap_targets
=
assigner
.
assign_segmentation_targets
(
heatmap_targets
,
heatmap_weight
=
assigner
.
assign_segmentation_targets
(
gt_masks_list
=
gt_masks_list
,
gt_classes_list
=
gt_classes_list
)
gt_classes_list
=
gt_classes_list
,
gt_boxes_list
=
gt_boxes_list
,
gt_mask_weights_list
=
gt_mask_weights_list
)
flattened_heatmap_targets
=
_flatten_spatial_dimensions
(
heatmap_targets
)
flattened_heatmap_mask
=
_flatten_spatial_dimensions
(
heatmap_weight
[:,
:,
:,
tf
.
newaxis
])
per_pixel_weights
*=
flattened_heatmap_mask
loss
=
0.0
mask_loss_fn
=
self
.
_mask_params
.
classification_loss
total_pixels_in_loss
=
tf
.
reduce_sum
(
per_pixel_weights
)
total_pixels_in_loss
=
tf
.
math
.
maximum
(
tf
.
reduce_sum
(
per_pixel_weights
),
1
)
# Loop through each feature output head.
for
pred
in
segmentation_predictions
:
...
...
research/object_detection/meta_architectures/center_net_meta_arch_tf2_test.py
View file @
a04d9e0e
...
...
@@ -1539,7 +1539,9 @@ def get_fake_mask_params():
classification_loss
=
losses
.
WeightedSoftmaxClassificationLoss
(),
task_loss_weight
=
1.0
,
mask_height
=
4
,
mask_width
=
4
)
mask_width
=
4
,
mask_head_num_filters
=
[
96
],
mask_head_kernel_sizes
=
[
3
])
def
get_fake_densepose_params
():
...
...
research/object_detection/model_lib.py
View file @
a04d9e0e
...
...
@@ -266,6 +266,7 @@ def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True):
# dimension. This list has to be kept in sync with InputDataFields in
# standard_fields.py.
fields
.
InputDataFields
.
groundtruth_instance_masks
,
fields
.
InputDataFields
.
groundtruth_instance_mask_weights
,
fields
.
InputDataFields
.
groundtruth_classes
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_keypoints
,
...
...
@@ -319,6 +320,10 @@ def provide_groundtruth(model, labels):
if
fields
.
InputDataFields
.
groundtruth_instance_masks
in
labels
:
gt_masks_list
=
labels
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
gt_mask_weights_list
=
None
if
fields
.
InputDataFields
.
groundtruth_instance_mask_weights
in
labels
:
gt_mask_weights_list
=
labels
[
fields
.
InputDataFields
.
groundtruth_instance_mask_weights
]
gt_keypoints_list
=
None
if
fields
.
InputDataFields
.
groundtruth_keypoints
in
labels
:
gt_keypoints_list
=
labels
[
fields
.
InputDataFields
.
groundtruth_keypoints
]
...
...
@@ -383,6 +388,7 @@ def provide_groundtruth(model, labels):
groundtruth_confidences_list
=
gt_confidences_list
,
groundtruth_labeled_classes
=
gt_labeled_classes
,
groundtruth_masks_list
=
gt_masks_list
,
groundtruth_mask_weights_list
=
gt_mask_weights_list
,
groundtruth_keypoints_list
=
gt_keypoints_list
,
groundtruth_keypoint_visibilities_list
=
gt_keypoint_visibilities_list
,
groundtruth_dp_num_points_list
=
gt_dp_num_points_list
,
...
...
research/object_detection/model_lib_v2.py
View file @
a04d9e0e
...
...
@@ -20,11 +20,11 @@ from __future__ import print_function
import
copy
import
os
import
pprint
import
time
import
numpy
as
np
import
numpy
as
np
import
tensorflow.compat.v1
as
tf
import
tensorflow.compat.v2
as
tf2
from
object_detection
import
eval_util
from
object_detection
import
inputs
...
...
@@ -87,6 +87,8 @@ def _compute_losses_and_predictions_dicts(
labels[fields.InputDataFields.groundtruth_instance_masks] is a
float32 tensor containing only binary values, which represent
instance masks for objects.
labels[fields.InputDataFields.groundtruth_instance_mask_weights] is a
float32 tensor containing weights for the instance masks.
labels[fields.InputDataFields.groundtruth_keypoints] is a
float32 tensor containing keypoints for each box.
labels[fields.InputDataFields.groundtruth_dp_num_points] is an int32
...
...
@@ -181,6 +183,22 @@ def _ensure_model_is_built(model, input_dataset, unpad_groundtruth_tensors):
))
def
normalize_dict
(
values_dict
,
num_replicas
):
num_replicas
=
tf
.
constant
(
num_replicas
,
dtype
=
tf
.
float32
)
return
{
key
:
tf
.
math
.
divide
(
loss
,
num_replicas
)
for
key
,
loss
in
values_dict
.
items
()}
def
reduce_dict
(
strategy
,
reduction_dict
,
reduction_op
):
# TODO(anjalisridhar): explore if it is safe to remove the # num_replicas
# scaling of the loss and switch this to a ReduceOp.Mean
return
{
name
:
strategy
.
reduce
(
reduction_op
,
loss
,
axis
=
None
)
for
name
,
loss
in
reduction_dict
.
items
()
}
# TODO(kaftan): Explore removing learning_rate from this method & returning
## The full losses dict instead of just total_loss, then doing all summaries
## saving in a utility method called by the outer training loop.
...
...
@@ -190,10 +208,8 @@ def eager_train_step(detection_model,
labels
,
unpad_groundtruth_tensors
,
optimizer
,
learning_rate
,
add_regularization_loss
=
True
,
clip_gradients_value
=
None
,
global_step
=
None
,
num_replicas
=
1.0
):
"""Process a single training batch.
...
...
@@ -237,6 +253,9 @@ def eager_train_step(detection_model,
labels[fields.InputDataFields.groundtruth_instance_masks] is a
[batch_size, num_boxes, H, W] float32 tensor containing only binary
values, which represent instance masks for objects.
labels[fields.InputDataFields.groundtruth_instance_mask_weights] is a
[batch_size, num_boxes] float32 tensor containing weights for the
instance masks.
labels[fields.InputDataFields.groundtruth_keypoints] is a
[batch_size, num_boxes, num_keypoints, 2] float32 tensor containing
keypoints for each box.
...
...
@@ -261,16 +280,10 @@ def eager_train_step(detection_model,
float32 tensor containing the weights of the keypoint depth feature.
unpad_groundtruth_tensors: A parameter passed to unstack_batch.
optimizer: The training optimizer that will update the variables.
learning_rate: The learning rate tensor for the current training step.
This is used only for TensorBoard logging purposes, it does not affect
model training.
add_regularization_loss: Whether or not to include the model's
regularization loss in the losses dictionary.
clip_gradients_value: If this is present, clip the gradients global norm
at this value using `tf.clip_by_global_norm`.
global_step: The current training step. Used for TensorBoard logging
purposes. This step is not updated by this function and must be
incremented separately.
num_replicas: The number of replicas in the current distribution strategy.
This is used to scale the total loss so that training in a distribution
strategy works correctly.
...
...
@@ -291,31 +304,18 @@ def eager_train_step(detection_model,
losses_dict
,
_
=
_compute_losses_and_predictions_dicts
(
detection_model
,
features
,
labels
,
add_regularization_loss
)
total_loss
=
losses_dict
[
'Loss/total_loss'
]
# Normalize loss for num replicas
total_loss
=
tf
.
math
.
divide
(
total_loss
,
tf
.
constant
(
num_replicas
,
dtype
=
tf
.
float32
))
losses_dict
[
'Loss/normalized_total_loss'
]
=
total_loss
for
loss_type
in
losses_dict
:
tf
.
compat
.
v2
.
summary
.
scalar
(
loss_type
,
losses_dict
[
loss_type
],
step
=
global_step
)
losses_dict
=
normalize_dict
(
losses_dict
,
num_replicas
)
trainable_variables
=
detection_model
.
trainable_variables
total_loss
=
losses_dict
[
'Loss/total_loss'
]
gradients
=
tape
.
gradient
(
total_loss
,
trainable_variables
)
if
clip_gradients_value
:
gradients
,
_
=
tf
.
clip_by_global_norm
(
gradients
,
clip_gradients_value
)
optimizer
.
apply_gradients
(
zip
(
gradients
,
trainable_variables
))
tf
.
compat
.
v2
.
summary
.
scalar
(
'learning_rate'
,
learning_rate
,
step
=
global_step
)
tf
.
compat
.
v2
.
summary
.
image
(
name
=
'train_input_images'
,
step
=
global_step
,
data
=
features
[
fields
.
InputDataFields
.
image
],
max_outputs
=
3
)
return
total_loss
return
losses_dict
def
validate_tf_v2_checkpoint_restore_map
(
checkpoint_restore_map
):
...
...
@@ -397,7 +397,8 @@ def load_fine_tune_checkpoint(model, checkpoint_path, checkpoint_type,
fine_tune_checkpoint_type
=
checkpoint_type
)
validate_tf_v2_checkpoint_restore_map
(
restore_from_objects_dict
)
ckpt
=
tf
.
train
.
Checkpoint
(
**
restore_from_objects_dict
)
ckpt
.
restore
(
checkpoint_path
).
assert_existing_objects_matched
()
ckpt
.
restore
(
checkpoint_path
).
expect_partial
().
assert_existing_objects_matched
()
def
get_filepath
(
strategy
,
filepath
):
...
...
@@ -474,7 +475,12 @@ def train_loop(
Checkpoint every n training steps.
checkpoint_max_to_keep:
int, the number of most recent checkpoints to keep in the model directory.
record_summaries: Boolean, whether or not to record summaries.
record_summaries: Boolean, whether or not to record summaries defined by
the model or the training pipeline. This does not impact the summaries
of the loss values which are always recorded. Examples of summaries
that are controlled by this flag include:
- Image summaries of training images.
- Intermediate tensors which maybe logged by meta architectures.
performance_summary_exporter: function for exporting performance metrics.
num_steps_per_iteration: int, The number of training steps to perform
in each iteration.
...
...
@@ -533,7 +539,8 @@ def train_loop(
strategy
=
tf
.
compat
.
v2
.
distribute
.
get_strategy
()
with
strategy
.
scope
():
detection_model
=
MODEL_BUILD_UTIL_MAP
[
'detection_model_fn_base'
](
model_config
=
model_config
,
is_training
=
True
)
model_config
=
model_config
,
is_training
=
True
,
add_summaries
=
record_summaries
)
def
train_dataset_fn
(
input_context
):
"""Callable to create train input."""
...
...
@@ -576,11 +583,9 @@ def train_loop(
# is the chief.
summary_writer_filepath
=
get_filepath
(
strategy
,
os
.
path
.
join
(
model_dir
,
'train'
))
if
record_summaries
:
summary_writer
=
tf
.
compat
.
v2
.
summary
.
create_file_writer
(
summary_writer_filepath
)
else
:
summary_writer
=
tf2
.
summary
.
create_noop_writer
()
summary_writer
=
tf
.
compat
.
v2
.
summary
.
create_file_writer
(
summary_writer_filepath
)
with
summary_writer
.
as_default
():
with
strategy
.
scope
():
...
...
@@ -614,32 +619,37 @@ def train_loop(
def
train_step_fn
(
features
,
labels
):
"""Single train step."""
loss
=
eager_train_step
(
if
record_summaries
:
tf
.
compat
.
v2
.
summary
.
image
(
name
=
'train_input_images'
,
step
=
global_step
,
data
=
features
[
fields
.
InputDataFields
.
image
],
max_outputs
=
3
)
losses_dict
=
eager_train_step
(
detection_model
,
features
,
labels
,
unpad_groundtruth_tensors
,
optimizer
,
learning_rate
=
learning_rate_fn
(),
add_regularization_loss
=
add_regularization_loss
,
clip_gradients_value
=
clip_gradients_value
,
global_step
=
global_step
,
num_replicas
=
strategy
.
num_replicas_in_sync
)
global_step
.
assign_add
(
1
)
return
loss
return
loss
es_dict
def
_sample_and_train
(
strategy
,
train_step_fn
,
data_iterator
):
features
,
labels
=
data_iterator
.
next
()
if
hasattr
(
tf
.
distribute
.
Strategy
,
'run'
):
per_replica_losses
=
strategy
.
run
(
per_replica_losses
_dict
=
strategy
.
run
(
train_step_fn
,
args
=
(
features
,
labels
))
else
:
per_replica_losses
=
strategy
.
experimental_run_v2
(
tra
in_step_fn
,
args
=
(
features
,
labels
))
# TODO(anjalisridhar): explore if it is safe to remove the
## num_replicas scaling of the loss and switch this to a ReduceOp.Mean
return
strategy
.
reduce
(
tf
.
distribute
.
ReduceOp
.
SUM
,
per_replica_losses
,
axis
=
None
)
per_replica_losses
_dict
=
(
s
tra
tegy
.
experimental_run_v2
(
train_step_fn
,
args
=
(
features
,
labels
)))
return
reduce_dict
(
strategy
,
per_replica_losses
_dict
,
tf
.
distribute
.
ReduceOp
.
SUM
)
@
tf
.
function
def
_dist_train_step
(
data_iterator
):
...
...
@@ -665,7 +675,7 @@ def train_loop(
for
_
in
range
(
global_step
.
value
(),
train_steps
,
num_steps_per_iteration
):
loss
=
_dist_train_step
(
train_input_iter
)
loss
es_dict
=
_dist_train_step
(
train_input_iter
)
time_taken
=
time
.
time
()
-
last_step_time
last_step_time
=
time
.
time
()
...
...
@@ -676,11 +686,19 @@ def train_loop(
steps_per_sec_list
.
append
(
steps_per_sec
)
logged_dict
=
losses_dict
.
copy
()
logged_dict
[
'learning_rate'
]
=
learning_rate_fn
()
for
key
,
val
in
logged_dict
.
items
():
tf
.
compat
.
v2
.
summary
.
scalar
(
key
,
val
,
step
=
global_step
)
if
global_step
.
value
()
-
logged_step
>=
100
:
logged_dict_np
=
{
name
:
value
.
numpy
()
for
name
,
value
in
logged_dict
.
items
()}
tf
.
logging
.
info
(
'Step {} per-step time {:.3f}s
loss={:.3f}
'
.
format
(
global_step
.
value
(),
time_taken
/
num_steps_per_iteration
,
loss
))
'Step {} per-step time {:.3f}s'
.
format
(
global_step
.
value
(),
time_taken
/
num_steps_per_iteration
))
tf
.
logging
.
info
(
pprint
.
pformat
(
logged_dict_np
,
width
=
40
))
logged_step
=
global_step
.
value
()
if
((
int
(
global_step
.
value
())
-
checkpointed_step
)
>=
...
...
@@ -699,7 +717,7 @@ def train_loop(
'steps_per_sec'
:
np
.
mean
(
steps_per_sec_list
),
'steps_per_sec_p50'
:
np
.
median
(
steps_per_sec_list
),
'steps_per_sec_max'
:
max
(
steps_per_sec_list
),
'last_batch_loss'
:
float
(
loss
)
'last_batch_loss'
:
float
(
loss
es_dict
[
'Loss/total_loss'
]
)
}
mixed_precision
=
'bf16'
if
kwargs
[
'use_bfloat16'
]
else
'fp32'
performance_summary_exporter
(
metrics
,
mixed_precision
)
...
...
research/object_detection/model_main_tf2.py
View file @
a04d9e0e
...
...
@@ -65,8 +65,10 @@ flags.DEFINE_integer(
flags
.
DEFINE_integer
(
'checkpoint_every_n'
,
1000
,
'Integer defining how often we checkpoint.'
)
flags
.
DEFINE_boolean
(
'record_summaries'
,
True
,
(
'Whether or not to record summaries during'
' training.'
))
(
'Whether or not to record summaries defined by the model'
' or the training pipeline. This does not impact the'
' summaries of the loss values which are always'
' recorded.'
))
FLAGS
=
flags
.
FLAGS
...
...
research/object_detection/models/keras_models/resnet_v1.py
View file @
a04d9e0e
...
...
@@ -19,9 +19,10 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
keras.applications
import
resnet
import
tensorflow.compat.v1
as
tf
from
tensorflow.python.keras.applications
import
resnet
from
object_detection.core
import
freezable_batch_norm
from
object_detection.models.keras_models
import
model_utils
...
...
research/object_detection/protos/center_net.proto
View file @
a04d9e0e
...
...
@@ -65,6 +65,14 @@ message CenterNet {
// Localization loss configuration for object scale and offset losses.
optional
LocalizationLoss
localization_loss
=
8
;
// Parameters to determine the architecture of the object scale prediction
// head.
optional
PredictionHeadParams
scale_head_params
=
9
;
// Parameters to determine the architecture of the object offset prediction
// head.
optional
PredictionHeadParams
offset_head_params
=
10
;
}
optional
ObjectDetection
object_detection_task
=
4
;
...
...
@@ -268,6 +276,10 @@ message CenterNet {
// prediction head. -2.19 corresponds to predicting foreground with
// a probability of 0.1.
optional
float
heatmap_bias_init
=
3
[
default
=
-
2.19
];
// Parameters to determine the architecture of the segmentation mask
// prediction head.
optional
PredictionHeadParams
mask_head_params
=
7
;
}
optional
MaskEstimation
mask_estimation_task
=
8
;
...
...
research/object_detection/utils/spatial_transform_ops.py
View file @
a04d9e0e
...
...
@@ -19,6 +19,7 @@ from __future__ import division
from
__future__
import
print_function
import
tensorflow.compat.v1
as
tf
from
object_detection.utils
import
shape_utils
def
_coordinate_vector_1d
(
start
,
end
,
size
,
align_endpoints
):
...
...
@@ -322,7 +323,7 @@ def multilevel_roi_align(features, boxes, box_levels, output_size,
"""
with
tf
.
name_scope
(
scope
,
'MultiLevelRoIAlign'
):
features
,
true_feature_shapes
=
pad_to_max_size
(
features
)
batch_size
=
tf
.
shape
(
features
)[
0
]
batch_size
=
shape_utils
.
combined_static_and_dynamic_
shape
(
features
)[
0
]
num_levels
=
features
.
get_shape
().
as_list
()[
1
]
max_feature_height
=
tf
.
shape
(
features
)[
2
]
max_feature_width
=
tf
.
shape
(
features
)[
3
]
...
...
research/object_detection/utils/target_assigner_utils.py
View file @
a04d9e0e
...
...
@@ -289,12 +289,38 @@ def get_valid_keypoint_mask_for_class(keypoint_coordinates,
return
mask
,
keypoints_nan_to_zeros
def
blackout_pixel_weights_by_box_regions
(
height
,
width
,
boxes
,
blackout
):
"""Blackout the pixel weights in the target box regions.
def
blackout_pixel_weights_by_box_regions
(
height
,
width
,
boxes
,
blackout
,
weights
=
None
):
"""Apply weights at pixel locations.
This function is used to generate the pixel weight mask (usually in the output
image dimension). The mask is to ignore some regions when computing loss.
Weights are applied as follows:
- Any region outside of a box gets the default weight 1.0
- Any box for which an explicit weight is specifed gets that weight. If
multiple boxes overlap, the maximum of the weights is applied.
- Any box for which blackout=True is specified will get a weight of 0.0,
regardless of whether an equivalent non-zero weight is specified. Also, the
blackout region takes precedence over other boxes which may overlap with
non-zero weight.
Example:
height = 4
width = 4
boxes = [[0., 0., 2., 2.],
[0., 0., 4., 2.],
[3., 0., 4., 4.]]
blackout = [False, False, True]
weights = [4.0, 3.0, 2.0]
blackout_pixel_weights_by_box_regions(height, width, boxes, blackout,
weights)
>> [[4.0, 4.0, 1.0, 1.0],
[4.0, 4.0, 1.0, 1.0],
[3.0, 3.0, 1.0, 1.0],
[0.0, 0.0, 0.0, 0.0]]
Args:
height: int, height of the (output) image.
width: int, width of the (output) image.
...
...
@@ -302,10 +328,15 @@ def blackout_pixel_weights_by_box_regions(height, width, boxes, blackout):
coordinates of the four corners of the boxes.
blackout: A boolean tensor with shape [num_instances] indicating whether to
blackout (zero-out) the weights within the box regions.
weights: An optional float32 tensor with shape [num_instances] indicating
a value to apply in each box region. Note that if blackout=True for a
given box, the weight will be zero. If None, all weights are assumed to be
1.
Returns:
A float tensor with shape [height, width] where all values within the
regions of the blackout boxes are 0.0 and 1.0 else where.
regions of the blackout boxes are 0.0 and 1.0 (or weights if supplied)
elsewhere.
"""
num_instances
,
_
=
shape_utils
.
combined_static_and_dynamic_shape
(
boxes
)
# If no annotation instance is provided, return all ones (instead of
...
...
@@ -323,22 +354,36 @@ def blackout_pixel_weights_by_box_regions(height, width, boxes, blackout):
# Make the mask with all 1.0 in the box regions.
# Shape: [num_instances, height, width]
in_boxes
=
tf
.
cast
(
tf
.
logical_and
(
tf
.
logical_and
(
y_grid
>=
y_min
,
y_grid
<=
y_max
),
tf
.
logical_and
(
x_grid
>=
x_min
,
x_grid
<=
x_max
)),
dtype
=
tf
.
float32
)
# Shape: [num_instances, height, width]
blackout
=
tf
.
tile
(
tf
.
expand_dims
(
tf
.
expand_dims
(
blackout
,
axis
=-
1
),
axis
=-
1
),
[
1
,
height
,
width
])
# Select only the boxes specified by blackout.
selected_in_boxes
=
tf
.
where
(
blackout
,
in_boxes
,
tf
.
zeros_like
(
in_boxes
))
out_boxes
=
tf
.
reduce_max
(
selected_in_boxes
,
axis
=
0
)
out_boxes
=
tf
.
ones_like
(
out_boxes
)
-
out_boxes
return
out_boxes
in_boxes
=
tf
.
math
.
logical_and
(
tf
.
math
.
logical_and
(
y_grid
>=
y_min
,
y_grid
<
y_max
),
tf
.
math
.
logical_and
(
x_grid
>=
x_min
,
x_grid
<
x_max
))
if
weights
is
None
:
weights
=
tf
.
ones_like
(
blackout
,
dtype
=
tf
.
float32
)
# Compute a [height, width] tensor with the maximum weight in each box, and
# 0.0 elsewhere.
weights_tiled
=
tf
.
tile
(
weights
[:,
tf
.
newaxis
,
tf
.
newaxis
],
[
1
,
height
,
width
])
weights_3d
=
tf
.
where
(
in_boxes
,
weights_tiled
,
tf
.
zeros_like
(
weights_tiled
))
weights_2d
=
tf
.
math
.
maximum
(
tf
.
math
.
reduce_max
(
weights_3d
,
axis
=
0
),
0.0
)
# Add 1.0 to all regions outside a box.
weights_2d
=
tf
.
where
(
tf
.
math
.
reduce_any
(
in_boxes
,
axis
=
0
),
weights_2d
,
tf
.
ones_like
(
weights_2d
))
# Now enforce that blackout regions all have zero weights.
keep_region
=
tf
.
cast
(
tf
.
math
.
logical_not
(
blackout
),
tf
.
float32
)
keep_region_tiled
=
tf
.
tile
(
keep_region
[:,
tf
.
newaxis
,
tf
.
newaxis
],
[
1
,
height
,
width
])
keep_region_3d
=
tf
.
where
(
in_boxes
,
keep_region_tiled
,
tf
.
ones_like
(
keep_region_tiled
))
keep_region_2d
=
tf
.
math
.
reduce_min
(
keep_region_3d
,
axis
=
0
)
return
weights_2d
*
keep_region_2d
def
_get_yx_indices_offset_by_radius
(
radius
):
...
...
research/object_detection/utils/target_assigner_utils_test.py
View file @
a04d9e0e
...
...
@@ -196,13 +196,36 @@ class TargetUtilTest(parameterized.TestCase, test_case.TestCase):
return
output
output
=
self
.
execute
(
graph_fn
,
[])
# All zeros in region [0:
6
, 0:
6
].
self
.
assertAlmostEqual
(
np
.
sum
(
output
[
0
:
6
,
0
:
6
]),
0.0
)
# All zeros in region [12:1
9
, 6:
9
].
self
.
assertAlmostEqual
(
np
.
sum
(
output
[
6
:
9
,
12
:
1
9
]),
0.0
)
# All zeros in region [0:
5
, 0:
5
].
self
.
assertAlmostEqual
(
np
.
sum
(
output
[
0
:
5
,
0
:
5
]),
0.0
)
# All zeros in region [12:1
8
, 6:
8
].
self
.
assertAlmostEqual
(
np
.
sum
(
output
[
6
:
8
,
12
:
1
8
]),
0.0
)
# All other pixel weights should be 1.0.
# 20 * 10 - 6 * 6 - 3 * 7 = 143.0
self
.
assertAlmostEqual
(
np
.
sum
(
output
),
143.0
)
# 20 * 10 - 5 * 5 - 2 * 6 = 163.0
self
.
assertAlmostEqual
(
np
.
sum
(
output
),
163.0
)
def
test_blackout_pixel_weights_by_box_regions_with_weights
(
self
):
def
graph_fn
():
boxes
=
tf
.
constant
(
[[
0.0
,
0.0
,
2.0
,
2.0
],
[
0.0
,
0.0
,
4.0
,
2.0
],
[
3.0
,
0.0
,
4.0
,
4.0
]],
dtype
=
tf
.
float32
)
blackout
=
tf
.
constant
([
False
,
False
,
True
],
dtype
=
tf
.
bool
)
weights
=
tf
.
constant
([
0.4
,
0.3
,
0.2
],
tf
.
float32
)
blackout_pixel_weights_by_box_regions
=
tf
.
function
(
ta_utils
.
blackout_pixel_weights_by_box_regions
)
output
=
blackout_pixel_weights_by_box_regions
(
4
,
4
,
boxes
,
blackout
,
weights
)
return
output
output
=
self
.
execute
(
graph_fn
,
[])
expected_weights
=
[
[
0.4
,
0.4
,
1.0
,
1.0
],
[
0.4
,
0.4
,
1.0
,
1.0
],
[
0.3
,
0.3
,
1.0
,
1.0
],
[
0.0
,
0.0
,
0.0
,
0.0
]]
np
.
testing
.
assert_array_almost_equal
(
expected_weights
,
output
)
def
test_blackout_pixel_weights_by_box_regions_zero_instance
(
self
):
def
graph_fn
():
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment