Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
31ca3b97
"profiler/vscode:/vscode.git/clone" did not exist on "880fbee95782a30fb16654f830502d03dd92fae2"
Commit
31ca3b97
authored
Jul 23, 2020
by
Kaushik Shivakumar
Browse files
resovle merge conflicts
parents
3e9d886d
7fcd7cba
Changes
392
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1677 additions
and
315 deletions
+1677
-315
research/object_detection/legacy/trainer_tf1_test.py
research/object_detection/legacy/trainer_tf1_test.py
+3
-0
research/object_detection/meta_architectures/center_net_meta_arch.py
...ject_detection/meta_architectures/center_net_meta_arch.py
+493
-59
research/object_detection/meta_architectures/center_net_meta_arch_tf2_test.py
...ction/meta_architectures/center_net_meta_arch_tf2_test.py
+219
-11
research/object_detection/meta_architectures/context_rcnn_meta_arch.py
...ct_detection/meta_architectures/context_rcnn_meta_arch.py
+1
-1
research/object_detection/meta_architectures/context_rcnn_meta_arch_tf1_test.py
...ion/meta_architectures/context_rcnn_meta_arch_tf1_test.py
+5
-4
research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
...ect_detection/meta_architectures/faster_rcnn_meta_arch.py
+51
-29
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
...tion/meta_architectures/faster_rcnn_meta_arch_test_lib.py
+4
-3
research/object_detection/meta_architectures/ssd_meta_arch.py
...arch/object_detection/meta_architectures/ssd_meta_arch.py
+46
-58
research/object_detection/metrics/coco_evaluation.py
research/object_detection/metrics/coco_evaluation.py
+6
-14
research/object_detection/metrics/coco_evaluation_test.py
research/object_detection/metrics/coco_evaluation_test.py
+7
-5
research/object_detection/metrics/oid_challenge_evaluation_utils.py
...bject_detection/metrics/oid_challenge_evaluation_utils.py
+7
-7
research/object_detection/metrics/oid_vrd_challenge_evaluation_utils.py
...t_detection/metrics/oid_vrd_challenge_evaluation_utils.py
+12
-12
research/object_detection/model_lib.py
research/object_detection/model_lib.py
+52
-11
research/object_detection/model_lib_tf2_test.py
research/object_detection/model_lib_tf2_test.py
+4
-3
research/object_detection/model_lib_v2.py
research/object_detection/model_lib_v2.py
+101
-65
research/object_detection/model_main.py
research/object_detection/model_main.py
+0
-6
research/object_detection/model_main_tf2.py
research/object_detection/model_main_tf2.py
+10
-9
research/object_detection/model_tpu_main.py
research/object_detection/model_tpu_main.py
+3
-18
research/object_detection/models/bidirectional_feature_pyramid_generators.py
...ection/models/bidirectional_feature_pyramid_generators.py
+486
-0
research/object_detection/models/bidirectional_feature_pyramid_generators_tf2_test.py
...dels/bidirectional_feature_pyramid_generators_tf2_test.py
+167
-0
No files found.
research/object_detection/legacy/trainer_tf1_test.py
View file @
31ca3b97
...
...
@@ -185,6 +185,9 @@ class FakeDetectionModel(model.DetectionModel):
"""
return
{
var
.
op
.
name
:
var
for
var
in
tf
.
global_variables
()}
def
restore_from_objects
(
self
,
fine_tune_checkpoint_type
):
pass
def
updates
(
self
):
"""Returns a list of update operators for this model.
...
...
research/object_detection/meta_architectures/center_net_meta_arch.py
View file @
31ca3b97
...
...
@@ -924,13 +924,16 @@ def convert_strided_predictions_to_normalized_keypoints(
def
convert_strided_predictions_to_instance_masks
(
boxes
,
classes
,
masks
,
stride
,
mask_height
,
mask_width
,
true_image_shapes
,
score_threshold
=
0.5
):
boxes
,
classes
,
masks
,
true_image_shapes
,
densepose_part_heatmap
=
None
,
densepose_surface_coords
=
None
,
stride
=
4
,
mask_height
=
256
,
mask_width
=
256
,
score_threshold
=
0.5
,
densepose_class_index
=-
1
):
"""Converts predicted full-image masks into instance masks.
For each predicted detection box:
* Crop and resize the predicted mask based on the detected bounding box
coordinates and class prediction. Uses bilinear resampling.
* Crop and resize the predicted mask (and optionally DensePose coordinates)
based on the detected bounding box coordinates and class prediction. Uses
bilinear resampling.
* Binarize the mask using the provided score threshold.
Args:
...
...
@@ -940,57 +943,212 @@ def convert_strided_predictions_to_instance_masks(
detected class for each box (0-indexed).
masks: A [batch, output_height, output_width, num_classes] float32
tensor with class probabilities.
true_image_shapes: A tensor of shape [batch, 3] representing the true
shape of the inputs not considering padding.
densepose_part_heatmap: (Optional) A [batch, output_height, output_width,
num_parts] float32 tensor with part scores (i.e. logits).
densepose_surface_coords: (Optional) A [batch, output_height, output_width,
2 * num_parts] float32 tensor with predicted part coordinates (in
vu-format).
stride: The stride in the output space.
mask_height: The desired resized height for instance masks.
mask_width: The desired resized width for instance masks.
true_image_shapes: A tensor of shape [batch, 3] representing the true
shape of the inputs not considering padding.
score_threshold: The threshold at which to convert predicted mask
into foreground pixels.
densepose_class_index: The class index (0-indexed) corresponding to the
class which has DensePose labels (e.g. person class).
Returns:
A [batch_size, max_detections, mask_height, mask_width] uint8 tensor with
predicted foreground mask for each instance. The masks take values in
{0, 1}.
A tuple of masks and surface_coords.
instance_masks: A [batch_size, max_detections, mask_height, mask_width]
uint8 tensor with predicted foreground mask for each
instance. If DensePose tensors are provided, then each pixel value in the
mask encodes the 1-indexed part.
surface_coords: A [batch_size, max_detections, mask_height, mask_width, 2]
float32 tensor with (v, u) coordinates. Note that v, u coordinates are
only defined on instance masks, and the coordinates at each location of
the foreground mask correspond to coordinates on a local part coordinate
system (the specific part can be inferred from the `instance_masks`
output. If DensePose feature maps are not passed to this function, this
output will be None.
Raises:
ValueError: If one but not both of `densepose_part_heatmap` and
`densepose_surface_coords` is provided.
"""
_
,
output_height
,
output_width
,
_
=
(
batch_size
,
output_height
,
output_width
,
_
=
(
shape_utils
.
combined_static_and_dynamic_shape
(
masks
))
input_height
=
stride
*
output_height
input_width
=
stride
*
output_width
true_heights
,
true_widths
,
_
=
tf
.
unstack
(
true_image_shapes
,
axis
=
1
)
# If necessary, create dummy DensePose tensors to simplify the map function.
densepose_present
=
True
if
((
densepose_part_heatmap
is
not
None
)
^
(
densepose_surface_coords
is
not
None
)):
raise
ValueError
(
'To use DensePose, both `densepose_part_heatmap` and '
'`densepose_surface_coords` must be provided'
)
if
densepose_part_heatmap
is
None
and
densepose_surface_coords
is
None
:
densepose_present
=
False
densepose_part_heatmap
=
tf
.
zeros
(
(
batch_size
,
output_height
,
output_width
,
1
),
dtype
=
tf
.
float32
)
densepose_surface_coords
=
tf
.
zeros
(
(
batch_size
,
output_height
,
output_width
,
2
),
dtype
=
tf
.
float32
)
crop_and_threshold_fn
=
functools
.
partial
(
crop_and_threshold_masks
,
input_height
=
input_height
,
input_width
=
input_width
,
mask_height
=
mask_height
,
mask_width
=
mask_width
,
score_threshold
=
score_threshold
,
densepose_class_index
=
densepose_class_index
)
instance_masks
,
surface_coords
=
shape_utils
.
static_or_dynamic_map_fn
(
crop_and_threshold_fn
,
elems
=
[
boxes
,
classes
,
masks
,
densepose_part_heatmap
,
densepose_surface_coords
,
true_heights
,
true_widths
],
dtype
=
[
tf
.
uint8
,
tf
.
float32
],
back_prop
=
False
)
surface_coords
=
surface_coords
if
densepose_present
else
None
return
instance_masks
,
surface_coords
def
crop_and_threshold_masks
(
elems
,
input_height
,
input_width
,
mask_height
=
256
,
mask_width
=
256
,
score_threshold
=
0.5
,
densepose_class_index
=-
1
):
"""Crops and thresholds masks based on detection boxes.
Args:
elems: A tuple of
boxes - float32 tensor of shape [max_detections, 4]
classes - int32 tensor of shape [max_detections] (0-indexed)
masks - float32 tensor of shape [output_height, output_width, num_classes]
part_heatmap - float32 tensor of shape [output_height, output_width,
num_parts]
surf_coords - float32 tensor of shape [output_height, output_width,
2 * num_parts]
true_height - scalar int tensor
true_width - scalar int tensor
input_height: Input height to network.
input_width: Input width to network.
mask_height: Height for resizing mask crops.
mask_width: Width for resizing mask crops.
score_threshold: The threshold at which to convert predicted mask
into foreground pixels.
densepose_class_index: scalar int tensor with the class index (0-indexed)
for DensePose.
Returns:
A tuple of
all_instances: A [max_detections, mask_height, mask_width] uint8 tensor
with a predicted foreground mask for each instance. Background is encoded
as 0, and foreground is encoded as a positive integer. Specific part
indices are encoded as 1-indexed parts (for classes that have part
information).
surface_coords: A [max_detections, mask_height, mask_width, 2]
float32 tensor with (v, u) coordinates. for each part.
"""
(
boxes
,
classes
,
masks
,
part_heatmap
,
surf_coords
,
true_height
,
true_width
)
=
elems
# Boxes are in normalized coordinates relative to true image shapes. Convert
# coordinates to be normalized relative to input image shapes (since masks
# may still have padding).
# Then crop and resize each mask.
def
crop_and_threshold_masks
(
args
):
"""Crops masks based on detection boxes."""
boxes
,
classes
,
masks
,
true_height
,
true_width
=
args
boxlist
=
box_list
.
BoxList
(
boxes
)
y_scale
=
true_height
/
input_height
x_scale
=
true_width
/
input_width
boxlist
=
box_list_ops
.
scale
(
boxlist
,
y_scale
,
x_scale
)
boxes
=
boxlist
.
get
()
# Convert masks from [input_height, input_width, num_classes] to
# [num_classes, input_height, input_width, 1].
masks_4d
=
tf
.
transpose
(
masks
,
perm
=
[
2
,
0
,
1
])[:,
:,
:,
tf
.
newaxis
]
cropped_masks
=
tf2
.
image
.
crop_and_resize
(
masks_4d
,
boxes
=
boxes
,
box_indices
=
classes
,
crop_size
=
[
mask_height
,
mask_width
],
method
=
'bilinear'
)
masks_3d
=
tf
.
squeeze
(
cropped_masks
,
axis
=
3
)
masks_binarized
=
tf
.
math
.
greater_equal
(
masks_3d
,
score_threshold
)
return
tf
.
cast
(
masks_binarized
,
tf
.
uint8
)
boxlist
=
box_list
.
BoxList
(
boxes
)
y_scale
=
true_height
/
input_height
x_scale
=
true_width
/
input_width
boxlist
=
box_list_ops
.
scale
(
boxlist
,
y_scale
,
x_scale
)
boxes
=
boxlist
.
get
()
# Convert masks from [output_height, output_width, num_classes] to
# [num_classes, output_height, output_width, 1].
num_classes
=
tf
.
shape
(
masks
)[
-
1
]
masks_4d
=
tf
.
transpose
(
masks
,
perm
=
[
2
,
0
,
1
])[:,
:,
:,
tf
.
newaxis
]
# Tile part and surface coordinate masks for all classes.
part_heatmap_4d
=
tf
.
tile
(
part_heatmap
[
tf
.
newaxis
,
:,
:,
:],
multiples
=
[
num_classes
,
1
,
1
,
1
])
surf_coords_4d
=
tf
.
tile
(
surf_coords
[
tf
.
newaxis
,
:,
:,
:],
multiples
=
[
num_classes
,
1
,
1
,
1
])
feature_maps_concat
=
tf
.
concat
([
masks_4d
,
part_heatmap_4d
,
surf_coords_4d
],
axis
=-
1
)
# The following tensor has shape
# [max_detections, mask_height, mask_width, 1 + 3 * num_parts].
cropped_masks
=
tf2
.
image
.
crop_and_resize
(
feature_maps_concat
,
boxes
=
boxes
,
box_indices
=
classes
,
crop_size
=
[
mask_height
,
mask_width
],
method
=
'bilinear'
)
# Split the cropped masks back into instance masks, part masks, and surface
# coordinates.
num_parts
=
tf
.
shape
(
part_heatmap
)[
-
1
]
instance_masks
,
part_heatmap_cropped
,
surface_coords_cropped
=
tf
.
split
(
cropped_masks
,
[
1
,
num_parts
,
2
*
num_parts
],
axis
=-
1
)
# Threshold the instance masks. Resulting tensor has shape
# [max_detections, mask_height, mask_width, 1].
instance_masks_int
=
tf
.
cast
(
tf
.
math
.
greater_equal
(
instance_masks
,
score_threshold
),
dtype
=
tf
.
int32
)
# Produce a binary mask that is 1.0 only:
# - in the foreground region for an instance
# - in detections corresponding to the DensePose class
det_with_parts
=
tf
.
equal
(
classes
,
densepose_class_index
)
det_with_parts
=
tf
.
cast
(
tf
.
reshape
(
det_with_parts
,
[
-
1
,
1
,
1
,
1
]),
dtype
=
tf
.
int32
)
instance_masks_with_parts
=
tf
.
math
.
multiply
(
instance_masks_int
,
det_with_parts
)
# Similarly, produce a binary mask that holds the foreground masks only for
# instances without parts (i.e. non-DensePose classes).
det_without_parts
=
1
-
det_with_parts
instance_masks_without_parts
=
tf
.
math
.
multiply
(
instance_masks_int
,
det_without_parts
)
# Assemble a tensor that has standard instance segmentation masks for
# non-DensePose classes (with values in [0, 1]), and part segmentation masks
# for DensePose classes (with vaues in [0, 1, ..., num_parts]).
part_mask_int_zero_indexed
=
tf
.
math
.
argmax
(
part_heatmap_cropped
,
axis
=-
1
,
output_type
=
tf
.
int32
)[:,
:,
:,
tf
.
newaxis
]
part_mask_int_one_indexed
=
part_mask_int_zero_indexed
+
1
all_instances
=
(
instance_masks_without_parts
+
instance_masks_with_parts
*
part_mask_int_one_indexed
)
# Gather the surface coordinates for the parts.
surface_coords_cropped
=
tf
.
reshape
(
surface_coords_cropped
,
[
-
1
,
mask_height
,
mask_width
,
num_parts
,
2
])
surface_coords
=
gather_surface_coords_for_parts
(
surface_coords_cropped
,
part_mask_int_zero_indexed
)
surface_coords
=
(
surface_coords
*
tf
.
cast
(
instance_masks_with_parts
,
tf
.
float32
))
return
[
tf
.
squeeze
(
all_instances
,
axis
=
3
),
surface_coords
]
def
gather_surface_coords_for_parts
(
surface_coords_cropped
,
highest_scoring_part
):
"""Gathers the (v, u) coordinates for the highest scoring DensePose parts.
true_heights
,
true_widths
,
_
=
tf
.
unstack
(
true_image_shapes
,
axis
=
1
)
masks_for_image
=
shape_utils
.
static_or_dynamic_map_fn
(
crop_and_threshold_masks
,
elems
=
[
boxes
,
classes
,
masks
,
true_heights
,
true_widths
],
dtype
=
tf
.
uint8
,
back_prop
=
False
)
masks
=
tf
.
stack
(
masks_for_image
,
axis
=
0
)
return
masks
Args:
surface_coords_cropped: A [max_detections, height, width, num_parts, 2]
float32 tensor with (v, u) surface coordinates.
highest_scoring_part: A [max_detections, height, width] integer tensor with
the highest scoring part (0-indexed) indices for each location.
Returns:
A [max_detections, height, width, 2] float32 tensor with the (v, u)
coordinates selected from the highest scoring parts.
"""
max_detections
,
height
,
width
,
num_parts
,
_
=
(
shape_utils
.
combined_static_and_dynamic_shape
(
surface_coords_cropped
))
flattened_surface_coords
=
tf
.
reshape
(
surface_coords_cropped
,
[
-
1
,
2
])
flattened_part_ids
=
tf
.
reshape
(
highest_scoring_part
,
[
-
1
])
# Produce lookup indices that represent the locations of the highest scoring
# parts in the `flattened_surface_coords` tensor.
flattened_lookup_indices
=
(
num_parts
*
tf
.
range
(
max_detections
*
height
*
width
)
+
flattened_part_ids
)
vu_coords_flattened
=
tf
.
gather
(
flattened_surface_coords
,
flattened_lookup_indices
,
axis
=
0
)
return
tf
.
reshape
(
vu_coords_flattened
,
[
max_detections
,
height
,
width
,
2
])
class
ObjectDetectionParams
(
...
...
@@ -1235,6 +1393,64 @@ class MaskParams(
score_threshold
,
heatmap_bias_init
)
class
DensePoseParams
(
collections
.
namedtuple
(
'DensePoseParams'
,
[
'class_id'
,
'classification_loss'
,
'localization_loss'
,
'part_loss_weight'
,
'coordinate_loss_weight'
,
'num_parts'
,
'task_loss_weight'
,
'upsample_to_input_res'
,
'upsample_method'
,
'heatmap_bias_init'
])):
"""Namedtuple to store DensePose prediction related parameters."""
__slots__
=
()
def
__new__
(
cls
,
class_id
,
classification_loss
,
localization_loss
,
part_loss_weight
=
1.0
,
coordinate_loss_weight
=
1.0
,
num_parts
=
24
,
task_loss_weight
=
1.0
,
upsample_to_input_res
=
True
,
upsample_method
=
'bilinear'
,
heatmap_bias_init
=-
2.19
):
"""Constructor with default values for DensePoseParams.
Args:
class_id: the ID of the class that contains the DensePose groundtruth.
This should typically correspond to the "person" class. Note that the ID
is 0-based, meaning that class 0 corresponds to the first non-background
object class.
classification_loss: an object_detection.core.losses.Loss object to
compute the loss for the body part predictions in CenterNet.
localization_loss: an object_detection.core.losses.Loss object to compute
the loss for the surface coordinate regression in CenterNet.
part_loss_weight: The loss weight to apply to part prediction.
coordinate_loss_weight: The loss weight to apply to surface coordinate
prediction.
num_parts: The number of DensePose parts to predict.
task_loss_weight: float, the loss weight for the DensePose task.
upsample_to_input_res: Whether to upsample the DensePose feature maps to
the input resolution before applying loss. Note that the prediction
outputs are still at the standard CenterNet output stride.
upsample_method: Method for upsampling DensePose feature maps. Options are
either 'bilinear' or 'nearest'). This takes no effect when
`upsample_to_input_res` is False.
heatmap_bias_init: float, the initial value of bias in the convolutional
kernel of the part prediction head. If set to None, the
bias is initialized with zeros.
Returns:
An initialized DensePoseParams namedtuple.
"""
return
super
(
DensePoseParams
,
cls
).
__new__
(
cls
,
class_id
,
classification_loss
,
localization_loss
,
part_loss_weight
,
coordinate_loss_weight
,
num_parts
,
task_loss_weight
,
upsample_to_input_res
,
upsample_method
,
heatmap_bias_init
)
# The following constants are used to generate the keys of the
# (prediction, loss, target assigner,...) dictionaries used in CenterNetMetaArch
# class.
...
...
@@ -1247,6 +1463,9 @@ KEYPOINT_HEATMAP = 'keypoint/heatmap'
KEYPOINT_OFFSET
=
'keypoint/offset'
SEGMENTATION_TASK
=
'segmentation_task'
SEGMENTATION_HEATMAP
=
'segmentation/heatmap'
DENSEPOSE_TASK
=
'densepose_task'
DENSEPOSE_HEATMAP
=
'densepose/heatmap'
DENSEPOSE_REGRESSION
=
'densepose/regression'
LOSS_KEY_PREFIX
=
'Loss'
...
...
@@ -1290,7 +1509,8 @@ class CenterNetMetaArch(model.DetectionModel):
object_center_params
,
object_detection_params
=
None
,
keypoint_params_dict
=
None
,
mask_params
=
None
):
mask_params
=
None
,
densepose_params
=
None
):
"""Initializes a CenterNet model.
Args:
...
...
@@ -1318,6 +1538,10 @@ class CenterNetMetaArch(model.DetectionModel):
mask_params: A MaskParams namedtuple. This object
holds the hyper-parameters for segmentation. Please see the class
definition for more details.
densepose_params: A DensePoseParams namedtuple. This object holds the
hyper-parameters for DensePose prediction. Please see the class
definition for more details. Note that if this is provided, it is
expected that `mask_params` is also provided.
"""
assert
object_detection_params
or
keypoint_params_dict
# Shorten the name for convenience and better formatting.
...
...
@@ -1333,6 +1557,10 @@ class CenterNetMetaArch(model.DetectionModel):
self
.
_od_params
=
object_detection_params
self
.
_kp_params_dict
=
keypoint_params_dict
self
.
_mask_params
=
mask_params
if
densepose_params
is
not
None
and
mask_params
is
None
:
raise
ValueError
(
'To run DensePose prediction, `mask_params` must also '
'be supplied.'
)
self
.
_densepose_params
=
densepose_params
# Construct the prediction head nets.
self
.
_prediction_head_dict
=
self
.
_construct_prediction_heads
(
...
...
@@ -1413,8 +1641,18 @@ class CenterNetMetaArch(model.DetectionModel):
if
self
.
_mask_params
is
not
None
:
prediction_heads
[
SEGMENTATION_HEATMAP
]
=
[
make_prediction_net
(
num_classes
,
bias_fill
=
class_prediction_bias_init
)
bias_fill
=
self
.
_mask_params
.
heatmap_bias_init
)
for
_
in
range
(
num_feature_outputs
)]
if
self
.
_densepose_params
is
not
None
:
prediction_heads
[
DENSEPOSE_HEATMAP
]
=
[
make_prediction_net
(
# pylint: disable=g-complex-comprehension
self
.
_densepose_params
.
num_parts
,
bias_fill
=
self
.
_densepose_params
.
heatmap_bias_init
)
for
_
in
range
(
num_feature_outputs
)]
prediction_heads
[
DENSEPOSE_REGRESSION
]
=
[
make_prediction_net
(
2
*
self
.
_densepose_params
.
num_parts
)
for
_
in
range
(
num_feature_outputs
)
]
return
prediction_heads
def
_initialize_target_assigners
(
self
,
stride
,
min_box_overlap_iou
):
...
...
@@ -1449,6 +1687,10 @@ class CenterNetMetaArch(model.DetectionModel):
if
self
.
_mask_params
is
not
None
:
target_assigners
[
SEGMENTATION_TASK
]
=
(
cn_assigner
.
CenterNetMaskTargetAssigner
(
stride
))
if
self
.
_densepose_params
is
not
None
:
dp_stride
=
1
if
self
.
_densepose_params
.
upsample_to_input_res
else
stride
target_assigners
[
DENSEPOSE_TASK
]
=
(
cn_assigner
.
CenterNetDensePoseTargetAssigner
(
dp_stride
))
return
target_assigners
...
...
@@ -1860,6 +2102,113 @@ class CenterNetMetaArch(model.DetectionModel):
float
(
len
(
segmentation_predictions
))
*
total_pixels_in_loss
)
return
total_loss
def
_compute_densepose_losses
(
self
,
input_height
,
input_width
,
prediction_dict
):
"""Computes the weighted DensePose losses.
Args:
input_height: An integer scalar tensor representing input image height.
input_width: An integer scalar tensor representing input image width.
prediction_dict: A dictionary holding predicted tensors output by the
"predict" function. See the "predict" function for more detailed
description.
Returns:
A dictionary of scalar float tensors representing the weighted losses for
the DensePose task:
DENSEPOSE_HEATMAP: the weighted part segmentation loss.
DENSEPOSE_REGRESSION: the weighted part surface coordinate loss.
"""
dp_heatmap_loss
,
dp_regression_loss
=
(
self
.
_compute_densepose_part_and_coordinate_losses
(
input_height
=
input_height
,
input_width
=
input_width
,
part_predictions
=
prediction_dict
[
DENSEPOSE_HEATMAP
],
surface_coord_predictions
=
prediction_dict
[
DENSEPOSE_REGRESSION
]))
loss_dict
=
{}
loss_dict
[
DENSEPOSE_HEATMAP
]
=
(
self
.
_densepose_params
.
part_loss_weight
*
dp_heatmap_loss
)
loss_dict
[
DENSEPOSE_REGRESSION
]
=
(
self
.
_densepose_params
.
coordinate_loss_weight
*
dp_regression_loss
)
return
loss_dict
def
_compute_densepose_part_and_coordinate_losses
(
self
,
input_height
,
input_width
,
part_predictions
,
surface_coord_predictions
):
"""Computes the individual losses for the DensePose task.
Args:
input_height: An integer scalar tensor representing input image height.
input_width: An integer scalar tensor representing input image width.
part_predictions: A list of float tensors of shape [batch_size,
out_height, out_width, num_parts].
surface_coord_predictions: A list of float tensors of shape [batch_size,
out_height, out_width, 2 * num_parts].
Returns:
A tuple with two scalar loss tensors: part_prediction_loss and
surface_coord_loss.
"""
gt_dp_num_points_list
=
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
densepose_num_points
)
gt_dp_part_ids_list
=
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
densepose_part_ids
)
gt_dp_surface_coords_list
=
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
densepose_surface_coords
)
gt_weights_list
=
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
weights
)
assigner
=
self
.
_target_assigner_dict
[
DENSEPOSE_TASK
]
batch_indices
,
batch_part_ids
,
batch_surface_coords
,
batch_weights
=
(
assigner
.
assign_part_and_coordinate_targets
(
height
=
input_height
,
width
=
input_width
,
gt_dp_num_points_list
=
gt_dp_num_points_list
,
gt_dp_part_ids_list
=
gt_dp_part_ids_list
,
gt_dp_surface_coords_list
=
gt_dp_surface_coords_list
,
gt_weights_list
=
gt_weights_list
))
part_prediction_loss
=
0
surface_coord_loss
=
0
classification_loss_fn
=
self
.
_densepose_params
.
classification_loss
localization_loss_fn
=
self
.
_densepose_params
.
localization_loss
num_predictions
=
float
(
len
(
part_predictions
))
num_valid_points
=
tf
.
math
.
count_nonzero
(
batch_weights
)
num_valid_points
=
tf
.
cast
(
tf
.
math
.
maximum
(
num_valid_points
,
1
),
tf
.
float32
)
for
part_pred
,
surface_coord_pred
in
zip
(
part_predictions
,
surface_coord_predictions
):
# Potentially upsample the feature maps, so that better quality (i.e.
# higher res) groundtruth can be applied.
if
self
.
_densepose_params
.
upsample_to_input_res
:
part_pred
=
tf
.
keras
.
layers
.
UpSampling2D
(
self
.
_stride
,
interpolation
=
self
.
_densepose_params
.
upsample_method
)(
part_pred
)
surface_coord_pred
=
tf
.
keras
.
layers
.
UpSampling2D
(
self
.
_stride
,
interpolation
=
self
.
_densepose_params
.
upsample_method
)(
surface_coord_pred
)
# Compute the part prediction loss.
part_pred
=
cn_assigner
.
get_batch_predictions_from_indices
(
part_pred
,
batch_indices
[:,
0
:
3
])
part_prediction_loss
+=
classification_loss_fn
(
part_pred
[:,
tf
.
newaxis
,
:],
batch_part_ids
[:,
tf
.
newaxis
,
:],
weights
=
batch_weights
[:,
tf
.
newaxis
,
tf
.
newaxis
])
# Compute the surface coordinate loss.
batch_size
,
out_height
,
out_width
,
_
=
_get_shape
(
surface_coord_pred
,
4
)
surface_coord_pred
=
tf
.
reshape
(
surface_coord_pred
,
[
batch_size
,
out_height
,
out_width
,
-
1
,
2
])
surface_coord_pred
=
cn_assigner
.
get_batch_predictions_from_indices
(
surface_coord_pred
,
batch_indices
)
surface_coord_loss
+=
localization_loss_fn
(
surface_coord_pred
,
batch_surface_coords
,
weights
=
batch_weights
[:,
tf
.
newaxis
])
part_prediction_loss
=
tf
.
reduce_sum
(
part_prediction_loss
)
/
(
num_predictions
*
num_valid_points
)
surface_coord_loss
=
tf
.
reduce_sum
(
surface_coord_loss
)
/
(
num_predictions
*
num_valid_points
)
return
part_prediction_loss
,
surface_coord_loss
def
preprocess
(
self
,
inputs
):
outputs
=
shape_utils
.
resize_images_and_return_shapes
(
inputs
,
self
.
_image_resizer_fn
)
...
...
@@ -1909,6 +2258,13 @@ class CenterNetMetaArch(model.DetectionModel):
'segmentation/heatmap' - [optional] A list of size num_feature_outputs
holding float tensors of size [batch_size, output_height,
output_width, num_classes] representing the mask logits.
'densepose/heatmap' - [optional] A list of size num_feature_outputs
holding float tensors of size [batch_size, output_height,
output_width, num_parts] representing the mask logits for each part.
'densepose/regression' - [optional] A list of size num_feature_outputs
holding float tensors of size [batch_size, output_height,
output_width, 2 * num_parts] representing the DensePose surface
coordinate predictions.
Note the $TASK_NAME is provided by the KeypointEstimation namedtuple
used to differentiate between different keypoint tasks.
"""
...
...
@@ -1938,10 +2294,16 @@ class CenterNetMetaArch(model.DetectionModel):
scope: Optional scope name.
Returns:
A dictionary mapping the keys ['Loss/object_center', 'Loss/box/scale',
'Loss/box/offset', 'Loss/$TASK_NAME/keypoint/heatmap',
'Loss/$TASK_NAME/keypoint/offset',
'Loss/$TASK_NAME/keypoint/regression', 'Loss/segmentation/heatmap'] to
A dictionary mapping the keys [
'Loss/object_center',
'Loss/box/scale', (optional)
'Loss/box/offset', (optional)
'Loss/$TASK_NAME/keypoint/heatmap', (optional)
'Loss/$TASK_NAME/keypoint/offset', (optional)
'Loss/$TASK_NAME/keypoint/regression', (optional)
'Loss/segmentation/heatmap', (optional)
'Loss/densepose/heatmap', (optional)
'Loss/densepose/regression]' (optional)
scalar tensors corresponding to the losses for different tasks. Note the
$TASK_NAME is provided by the KeypointEstimation namedtuple used to
differentiate between different keypoint tasks.
...
...
@@ -1999,6 +2361,16 @@ class CenterNetMetaArch(model.DetectionModel):
seg_losses
[
key
]
=
seg_losses
[
key
]
*
self
.
_mask_params
.
task_loss_weight
losses
.
update
(
seg_losses
)
if
self
.
_densepose_params
is
not
None
:
densepose_losses
=
self
.
_compute_densepose_losses
(
input_height
=
input_height
,
input_width
=
input_width
,
prediction_dict
=
prediction_dict
)
for
key
in
densepose_losses
:
densepose_losses
[
key
]
=
(
densepose_losses
[
key
]
*
self
.
_densepose_params
.
task_loss_weight
)
losses
.
update
(
densepose_losses
)
# Prepend the LOSS_KEY_PREFIX to the keys in the dictionary such that the
# losses will be grouped together in Tensorboard.
return
dict
([(
'%s/%s'
%
(
LOSS_KEY_PREFIX
,
key
),
val
)
...
...
@@ -2033,9 +2405,14 @@ class CenterNetMetaArch(model.DetectionModel):
invalid keypoints have their coordinates and scores set to 0.0.
detection_keypoint_scores: (Optional) A float tensor of shape [batch,
max_detection, num_keypoints] with scores for each keypoint.
detection_masks: (Optional) An int tensor of shape [batch,
max_detections, mask_height, mask_width] with binarized masks for each
detection.
detection_masks: (Optional) A uint8 tensor of shape [batch,
max_detections, mask_height, mask_width] with masks for each
detection. Background is specified with 0, and foreground is specified
with positive integers (1 for standard instance segmentation mask, and
1-indexed parts for DensePose task).
detection_surface_coords: (Optional) A float32 tensor of shape [batch,
max_detection, mask_height, mask_width, 2] with DensePose surface
coordinates, in (v, u) format.
"""
object_center_prob
=
tf
.
nn
.
sigmoid
(
prediction_dict
[
OBJECT_CENTER
][
-
1
])
# Get x, y and channel indices corresponding to the top indices in the class
...
...
@@ -2076,14 +2453,27 @@ class CenterNetMetaArch(model.DetectionModel):
if
self
.
_mask_params
:
masks
=
tf
.
nn
.
sigmoid
(
prediction_dict
[
SEGMENTATION_HEATMAP
][
-
1
])
instance_masks
=
convert_strided_predictions_to_instance_masks
(
boxes
,
classes
,
masks
,
self
.
_stride
,
self
.
_mask_params
.
mask_height
,
self
.
_mask_params
.
mask_width
,
true_image_shapes
,
self
.
_mask_params
.
score_threshold
)
postprocess_dict
.
update
({
fields
.
DetectionResultFields
.
detection_masks
:
instance_masks
})
densepose_part_heatmap
,
densepose_surface_coords
=
None
,
None
densepose_class_index
=
0
if
self
.
_densepose_params
:
densepose_part_heatmap
=
prediction_dict
[
DENSEPOSE_HEATMAP
][
-
1
]
densepose_surface_coords
=
prediction_dict
[
DENSEPOSE_REGRESSION
][
-
1
]
densepose_class_index
=
self
.
_densepose_params
.
class_id
instance_masks
,
surface_coords
=
(
convert_strided_predictions_to_instance_masks
(
boxes
,
classes
,
masks
,
true_image_shapes
,
densepose_part_heatmap
,
densepose_surface_coords
,
stride
=
self
.
_stride
,
mask_height
=
self
.
_mask_params
.
mask_height
,
mask_width
=
self
.
_mask_params
.
mask_width
,
score_threshold
=
self
.
_mask_params
.
score_threshold
,
densepose_class_index
=
densepose_class_index
))
postprocess_dict
[
fields
.
DetectionResultFields
.
detection_masks
]
=
instance_masks
if
self
.
_densepose_params
:
postprocess_dict
[
fields
.
DetectionResultFields
.
detection_surface_coords
]
=
(
surface_coords
)
return
postprocess_dict
def
_postprocess_keypoints
(
self
,
prediction_dict
,
classes
,
y_indices
,
...
...
@@ -2330,17 +2720,61 @@ class CenterNetMetaArch(model.DetectionModel):
def
regularization_losses
(
self
):
return
[]
def
restore_map
(
self
,
fine_tune_checkpoint_type
=
'classification'
,
def
restore_map
(
self
,
fine_tune_checkpoint_type
=
'detection'
,
load_all_detection_checkpoint_vars
=
False
):
raise
RuntimeError
(
'CenterNetMetaArch not supported under TF1.x.'
)
def
restore_from_objects
(
self
,
fine_tune_checkpoint_type
=
'detection'
):
"""Returns a map of Trackable objects to load from a foreign checkpoint.
Returns a dictionary of Tensorflow 2 Trackable objects (e.g. tf.Module
or Checkpoint). This enables the model to initialize based on weights from
another task. For example, the feature extractor variables from a
classification model can be used to bootstrap training of an object
detector. When loading from an object detection model, the checkpoint model
should have the same parameters as this detection model with exception of
the num_classes parameter.
Note that this function is intended to be used to restore Keras-based
models when running Tensorflow 2, whereas restore_map (not implemented
in CenterNet) is intended to be used to restore Slim-based models when
running Tensorflow 1.x.
TODO(jonathanhuang): Make this function consistent with other
meta-architectures.
Args:
fine_tune_checkpoint_type: whether to restore from a full detection
checkpoint (with compatible variable names) or to restore from a
classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`. Default 'detection'.
'detection': used when loading in the Hourglass model pre-trained on
other detection task.
'classification': used when loading in the ResNet model pre-trained on
image classification task. Note that only the image feature encoding
part is loaded but not those upsampling layers.
'fine_tune': used when loading the entire CenterNet feature extractor
pre-trained on other tasks. The checkpoints saved during CenterNet
model training can be directly loaded using this mode.
Returns:
A dict mapping keys to Trackable objects (tf.Module or Checkpoint).
"""
if
fine_tune_checkpoint_type
==
'classification'
:
return
{
'feature_extractor'
:
self
.
_feature_extractor
.
get_base_model
()}
if
fine_tune_checkpoint_type
==
'detection'
:
el
if
fine_tune_checkpoint_type
==
'detection'
:
return
{
'feature_extractor'
:
self
.
_feature_extractor
.
get_model
()}
elif
fine_tune_checkpoint_type
==
'fine_tune'
:
feature_extractor_model
=
tf
.
train
.
Checkpoint
(
_feature_extractor
=
self
.
_feature_extractor
)
return
{
'model'
:
feature_extractor_model
}
else
:
raise
ValueError
(
'
Unknown
fine tune checkpoint type - {}'
.
format
(
raise
ValueError
(
'
Not supported
fine tune checkpoint type - {}'
.
format
(
fine_tune_checkpoint_type
))
def
updates
(
self
):
...
...
research/object_detection/meta_architectures/center_net_meta_arch_tf2_test.py
View file @
31ca3b97
...
...
@@ -266,7 +266,7 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
masks_np
[
0
,
:,
:
3
,
1
]
=
1
# Class 1.
masks
=
tf
.
constant
(
masks_np
)
true_image_shapes
=
tf
.
constant
([[
6
,
8
,
3
]])
instance_masks
=
cnma
.
convert_strided_predictions_to_instance_masks
(
instance_masks
,
_
=
cnma
.
convert_strided_predictions_to_instance_masks
(
boxes
,
classes
,
masks
,
stride
=
2
,
mask_height
=
2
,
mask_width
=
2
,
true_image_shapes
=
true_image_shapes
)
return
instance_masks
...
...
@@ -289,6 +289,104 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
])
np
.
testing
.
assert_array_equal
(
expected_instance_masks
,
instance_masks
)
def
test_convert_strided_predictions_raises_error_with_one_tensor
(
self
):
def
graph_fn
():
boxes
=
tf
.
constant
(
[
[[
0.5
,
0.5
,
1.0
,
1.0
],
[
0.0
,
0.5
,
0.5
,
1.0
],
[
0.0
,
0.0
,
0.0
,
0.0
]],
],
tf
.
float32
)
classes
=
tf
.
constant
(
[
[
0
,
1
,
0
],
],
tf
.
int32
)
masks_np
=
np
.
zeros
((
1
,
4
,
4
,
2
),
dtype
=
np
.
float32
)
masks_np
[
0
,
:,
2
:,
0
]
=
1
# Class 0.
masks_np
[
0
,
:,
:
3
,
1
]
=
1
# Class 1.
masks
=
tf
.
constant
(
masks_np
)
true_image_shapes
=
tf
.
constant
([[
6
,
8
,
3
]])
densepose_part_heatmap
=
tf
.
random
.
uniform
(
[
1
,
4
,
4
,
24
])
instance_masks
,
_
=
cnma
.
convert_strided_predictions_to_instance_masks
(
boxes
,
classes
,
masks
,
true_image_shapes
,
densepose_part_heatmap
=
densepose_part_heatmap
,
densepose_surface_coords
=
None
)
return
instance_masks
with
self
.
assertRaises
(
ValueError
):
self
.
execute_cpu
(
graph_fn
,
[])
def
test_crop_and_threshold_masks
(
self
):
boxes_np
=
np
.
array
(
[[
0.
,
0.
,
0.5
,
0.5
],
[
0.25
,
0.25
,
1.0
,
1.0
]],
dtype
=
np
.
float32
)
classes_np
=
np
.
array
([
0
,
2
],
dtype
=
np
.
int32
)
masks_np
=
np
.
zeros
((
4
,
4
,
_NUM_CLASSES
),
dtype
=
np
.
float32
)
masks_np
[
0
,
0
,
0
]
=
0.8
masks_np
[
1
,
1
,
0
]
=
0.6
masks_np
[
3
,
3
,
2
]
=
0.7
part_heatmap_np
=
np
.
zeros
((
4
,
4
,
_DENSEPOSE_NUM_PARTS
),
dtype
=
np
.
float32
)
part_heatmap_np
[
0
,
0
,
4
]
=
1
part_heatmap_np
[
0
,
0
,
2
]
=
0.6
# Lower scoring.
part_heatmap_np
[
1
,
1
,
8
]
=
0.2
part_heatmap_np
[
3
,
3
,
4
]
=
0.5
surf_coords_np
=
np
.
zeros
((
4
,
4
,
2
*
_DENSEPOSE_NUM_PARTS
),
dtype
=
np
.
float32
)
surf_coords_np
[:,
:,
8
:
10
]
=
0.2
,
0.9
surf_coords_np
[:,
:,
16
:
18
]
=
0.3
,
0.5
true_height
,
true_width
=
10
,
10
input_height
,
input_width
=
10
,
10
mask_height
=
4
mask_width
=
4
def
graph_fn
():
elems
=
[
tf
.
constant
(
boxes_np
),
tf
.
constant
(
classes_np
),
tf
.
constant
(
masks_np
),
tf
.
constant
(
part_heatmap_np
),
tf
.
constant
(
surf_coords_np
),
tf
.
constant
(
true_height
,
dtype
=
tf
.
int32
),
tf
.
constant
(
true_width
,
dtype
=
tf
.
int32
)
]
part_masks
,
surface_coords
=
cnma
.
crop_and_threshold_masks
(
elems
,
input_height
,
input_width
,
mask_height
=
mask_height
,
mask_width
=
mask_width
,
densepose_class_index
=
0
)
return
part_masks
,
surface_coords
part_masks
,
surface_coords
=
self
.
execute_cpu
(
graph_fn
,
[])
expected_part_masks
=
np
.
zeros
((
2
,
4
,
4
),
dtype
=
np
.
uint8
)
expected_part_masks
[
0
,
0
,
0
]
=
5
# Recall classes are 1-indexed in output.
expected_part_masks
[
0
,
2
,
2
]
=
9
# Recall classes are 1-indexed in output.
expected_part_masks
[
1
,
3
,
3
]
=
1
# Standard instance segmentation mask.
expected_surface_coords
=
np
.
zeros
((
2
,
4
,
4
,
2
),
dtype
=
np
.
float32
)
expected_surface_coords
[
0
,
0
,
0
,
:]
=
0.2
,
0.9
expected_surface_coords
[
0
,
2
,
2
,
:]
=
0.3
,
0.5
np
.
testing
.
assert_allclose
(
expected_part_masks
,
part_masks
)
np
.
testing
.
assert_allclose
(
expected_surface_coords
,
surface_coords
)
def
test_gather_surface_coords_for_parts
(
self
):
surface_coords_cropped_np
=
np
.
zeros
((
2
,
5
,
5
,
_DENSEPOSE_NUM_PARTS
,
2
),
dtype
=
np
.
float32
)
surface_coords_cropped_np
[
0
,
0
,
0
,
5
]
=
0.3
,
0.4
surface_coords_cropped_np
[
0
,
1
,
0
,
9
]
=
0.5
,
0.6
highest_scoring_part_np
=
np
.
zeros
((
2
,
5
,
5
),
dtype
=
np
.
int32
)
highest_scoring_part_np
[
0
,
0
,
0
]
=
5
highest_scoring_part_np
[
0
,
1
,
0
]
=
9
def
graph_fn
():
surface_coords_cropped
=
tf
.
constant
(
surface_coords_cropped_np
,
tf
.
float32
)
highest_scoring_part
=
tf
.
constant
(
highest_scoring_part_np
,
tf
.
int32
)
surface_coords_gathered
=
cnma
.
gather_surface_coords_for_parts
(
surface_coords_cropped
,
highest_scoring_part
)
return
surface_coords_gathered
surface_coords_gathered
=
self
.
execute_cpu
(
graph_fn
,
[])
np
.
testing
.
assert_allclose
([
0.3
,
0.4
],
surface_coords_gathered
[
0
,
0
,
0
])
np
.
testing
.
assert_allclose
([
0.5
,
0.6
],
surface_coords_gathered
[
0
,
1
,
0
])
def
test_top_k_feature_map_locations
(
self
):
feature_map_np
=
np
.
zeros
((
2
,
3
,
3
,
2
),
dtype
=
np
.
float32
)
feature_map_np
[
0
,
2
,
0
,
1
]
=
1.0
...
...
@@ -535,6 +633,8 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
keypoint_heatmap_np
[
1
,
0
,
1
,
1
]
=
0.9
keypoint_heatmap_np
[
1
,
2
,
0
,
1
]
=
0.8
# Note that the keypoint offsets are now per keypoint (as opposed to
# keypoint agnostic, in the test test_keypoint_candidate_prediction).
keypoint_heatmap_offsets_np
=
np
.
zeros
((
2
,
3
,
3
,
4
),
dtype
=
np
.
float32
)
keypoint_heatmap_offsets_np
[
0
,
0
,
0
]
=
[
0.5
,
0.25
,
0.0
,
0.0
]
keypoint_heatmap_offsets_np
[
0
,
2
,
1
]
=
[
-
0.25
,
0.5
,
0.0
,
0.0
]
...
...
@@ -949,6 +1049,7 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
_NUM_CLASSES
=
10
_KEYPOINT_INDICES
=
[
0
,
1
,
2
,
3
]
_NUM_KEYPOINTS
=
len
(
_KEYPOINT_INDICES
)
_DENSEPOSE_NUM_PARTS
=
24
_TASK_NAME
=
'human_pose'
...
...
@@ -991,6 +1092,20 @@ def get_fake_mask_params():
mask_width
=
4
)
def
get_fake_densepose_params
():
"""Returns the fake DensePose estimation parameter namedtuple."""
return
cnma
.
DensePoseParams
(
class_id
=
1
,
classification_loss
=
losses
.
WeightedSoftmaxClassificationLoss
(),
localization_loss
=
losses
.
L1LocalizationLoss
(),
part_loss_weight
=
1.0
,
coordinate_loss_weight
=
1.0
,
num_parts
=
_DENSEPOSE_NUM_PARTS
,
task_loss_weight
=
1.0
,
upsample_to_input_res
=
True
,
upsample_method
=
'nearest'
)
def
build_center_net_meta_arch
(
build_resnet
=
False
):
"""Builds the CenterNet meta architecture."""
if
build_resnet
:
...
...
@@ -1018,7 +1133,8 @@ def build_center_net_meta_arch(build_resnet=False):
object_center_params
=
get_fake_center_params
(),
object_detection_params
=
get_fake_od_params
(),
keypoint_params_dict
=
{
_TASK_NAME
:
get_fake_kp_params
()},
mask_params
=
get_fake_mask_params
())
mask_params
=
get_fake_mask_params
(),
densepose_params
=
get_fake_densepose_params
())
def
_logit
(
p
):
...
...
@@ -1102,6 +1218,16 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
fake_feature_map
)
self
.
assertEqual
((
4
,
128
,
128
,
_NUM_CLASSES
),
output
.
shape
)
# "densepose parts" head:
output
=
model
.
_prediction_head_dict
[
cnma
.
DENSEPOSE_HEATMAP
][
-
1
](
fake_feature_map
)
self
.
assertEqual
((
4
,
128
,
128
,
_DENSEPOSE_NUM_PARTS
),
output
.
shape
)
# "densepose surface coordinates" head:
output
=
model
.
_prediction_head_dict
[
cnma
.
DENSEPOSE_REGRESSION
][
-
1
](
fake_feature_map
)
self
.
assertEqual
((
4
,
128
,
128
,
2
*
_DENSEPOSE_NUM_PARTS
),
output
.
shape
)
def
test_initialize_target_assigners
(
self
):
model
=
build_center_net_meta_arch
()
assigner_dict
=
model
.
_initialize_target_assigners
(
...
...
@@ -1125,6 +1251,10 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
self
.
assertIsInstance
(
assigner_dict
[
cnma
.
SEGMENTATION_TASK
],
cn_assigner
.
CenterNetMaskTargetAssigner
)
# DensePose estimation target assigner:
self
.
assertIsInstance
(
assigner_dict
[
cnma
.
DENSEPOSE_TASK
],
cn_assigner
.
CenterNetDensePoseTargetAssigner
)
def
test_predict
(
self
):
"""Test the predict function."""
...
...
@@ -1145,6 +1275,10 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
(
2
,
32
,
32
,
2
))
self
.
assertEqual
(
prediction_dict
[
cnma
.
SEGMENTATION_HEATMAP
][
0
].
shape
,
(
2
,
32
,
32
,
_NUM_CLASSES
))
self
.
assertEqual
(
prediction_dict
[
cnma
.
DENSEPOSE_HEATMAP
][
0
].
shape
,
(
2
,
32
,
32
,
_DENSEPOSE_NUM_PARTS
))
self
.
assertEqual
(
prediction_dict
[
cnma
.
DENSEPOSE_REGRESSION
][
0
].
shape
,
(
2
,
32
,
32
,
2
*
_DENSEPOSE_NUM_PARTS
))
def
test_loss
(
self
):
"""Test the loss function."""
...
...
@@ -1157,7 +1291,13 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
groundtruth_keypoints_list
=
groundtruth_dict
[
fields
.
BoxListFields
.
keypoints
],
groundtruth_masks_list
=
groundtruth_dict
[
fields
.
BoxListFields
.
masks
])
fields
.
BoxListFields
.
masks
],
groundtruth_dp_num_points_list
=
groundtruth_dict
[
fields
.
BoxListFields
.
densepose_num_points
],
groundtruth_dp_part_ids_list
=
groundtruth_dict
[
fields
.
BoxListFields
.
densepose_part_ids
],
groundtruth_dp_surface_coords_list
=
groundtruth_dict
[
fields
.
BoxListFields
.
densepose_surface_coords
])
prediction_dict
=
get_fake_prediction_dict
(
input_height
=
16
,
input_width
=
32
,
stride
=
4
)
...
...
@@ -1193,6 +1333,12 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
self
.
assertGreater
(
0.01
,
loss_dict
[
'%s/%s'
%
(
cnma
.
LOSS_KEY_PREFIX
,
cnma
.
SEGMENTATION_HEATMAP
)])
self
.
assertGreater
(
0.01
,
loss_dict
[
'%s/%s'
%
(
cnma
.
LOSS_KEY_PREFIX
,
cnma
.
DENSEPOSE_HEATMAP
)])
self
.
assertGreater
(
0.01
,
loss_dict
[
'%s/%s'
%
(
cnma
.
LOSS_KEY_PREFIX
,
cnma
.
DENSEPOSE_REGRESSION
)])
@
parameterized
.
parameters
(
{
'target_class_id'
:
1
},
...
...
@@ -1230,6 +1376,14 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
segmentation_heatmap
[:,
14
:
18
,
14
:
18
,
target_class_id
]
=
1.0
segmentation_heatmap
=
_logit
(
segmentation_heatmap
)
dp_part_ind
=
4
dp_part_heatmap
=
np
.
zeros
((
1
,
32
,
32
,
_DENSEPOSE_NUM_PARTS
),
dtype
=
np
.
float32
)
dp_part_heatmap
[
0
,
14
:
18
,
14
:
18
,
dp_part_ind
]
=
1.0
dp_part_heatmap
=
_logit
(
dp_part_heatmap
)
dp_surf_coords
=
np
.
random
.
randn
(
1
,
32
,
32
,
2
*
_DENSEPOSE_NUM_PARTS
)
class_center
=
tf
.
constant
(
class_center
)
height_width
=
tf
.
constant
(
height_width
)
offset
=
tf
.
constant
(
offset
)
...
...
@@ -1237,6 +1391,8 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
keypoint_offsets
=
tf
.
constant
(
keypoint_offsets
,
dtype
=
tf
.
float32
)
keypoint_regression
=
tf
.
constant
(
keypoint_regression
,
dtype
=
tf
.
float32
)
segmentation_heatmap
=
tf
.
constant
(
segmentation_heatmap
,
dtype
=
tf
.
float32
)
dp_part_heatmap
=
tf
.
constant
(
dp_part_heatmap
,
dtype
=
tf
.
float32
)
dp_surf_coords
=
tf
.
constant
(
dp_surf_coords
,
dtype
=
tf
.
float32
)
prediction_dict
=
{
cnma
.
OBJECT_CENTER
:
[
class_center
],
...
...
@@ -1249,6 +1405,8 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
cnma
.
get_keypoint_name
(
_TASK_NAME
,
cnma
.
KEYPOINT_REGRESSION
):
[
keypoint_regression
],
cnma
.
SEGMENTATION_HEATMAP
:
[
segmentation_heatmap
],
cnma
.
DENSEPOSE_HEATMAP
:
[
dp_part_heatmap
],
cnma
.
DENSEPOSE_REGRESSION
:
[
dp_surf_coords
]
}
def
graph_fn
():
...
...
@@ -1271,12 +1429,13 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
self
.
assertAllEqual
([
1
,
max_detection
,
4
,
4
],
detections
[
'detection_masks'
].
shape
)
# There should be some section of the first mask (correspond to the only
# detection) with non-zero mask values.
self
.
assertGreater
(
np
.
sum
(
detections
[
'detection_masks'
][
0
,
0
,
:,
:]
>
0
),
0
)
# Masks should be empty for everything but the first detection.
self
.
assertAllEqual
(
detections
[
'detection_masks'
][
0
,
1
:,
:,
:],
np
.
zeros_like
(
detections
[
'detection_masks'
][
0
,
1
:,
:,
:]))
self
.
assertAllEqual
(
detections
[
'detection_surface_coords'
][
0
,
1
:,
:,
:],
np
.
zeros_like
(
detections
[
'detection_surface_coords'
][
0
,
1
:,
:,
:]))
if
target_class_id
==
1
:
expected_kpts_for_obj_0
=
np
.
array
(
...
...
@@ -1287,6 +1446,12 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
expected_kpts_for_obj_0
,
rtol
=
1e-6
)
np
.
testing
.
assert_allclose
(
detections
[
'detection_keypoint_scores'
][
0
][
0
],
expected_kpt_scores_for_obj_0
,
rtol
=
1e-6
)
# First detection has DensePose parts.
self
.
assertSameElements
(
np
.
unique
(
detections
[
'detection_masks'
][
0
,
0
,
:,
:]),
set
([
0
,
dp_part_ind
+
1
]))
self
.
assertGreater
(
np
.
sum
(
np
.
abs
(
detections
[
'detection_surface_coords'
])),
0.0
)
else
:
# All keypoint outputs should be zeros.
np
.
testing
.
assert_allclose
(
...
...
@@ -1297,6 +1462,14 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
detections
[
'detection_keypoint_scores'
][
0
][
0
],
np
.
zeros
([
num_keypoints
],
np
.
float
),
rtol
=
1e-6
)
# Binary segmentation mask.
self
.
assertSameElements
(
np
.
unique
(
detections
[
'detection_masks'
][
0
,
0
,
:,
:]),
set
([
0
,
1
]))
# No DensePose surface coordinates.
np
.
testing
.
assert_allclose
(
detections
[
'detection_surface_coords'
][
0
,
0
,
:,
:],
np
.
zeros_like
(
detections
[
'detection_surface_coords'
][
0
,
0
,
:,
:]))
def
test_get_instance_indices
(
self
):
classes
=
tf
.
constant
([[
0
,
1
,
2
,
0
],
[
2
,
1
,
2
,
2
]],
dtype
=
tf
.
int32
)
...
...
@@ -1353,6 +1526,17 @@ def get_fake_prediction_dict(input_height, input_width, stride):
mask_heatmap
[
0
,
2
,
4
,
1
]
=
1.0
mask_heatmap
=
_logit
(
mask_heatmap
)
densepose_heatmap
=
np
.
zeros
((
2
,
output_height
,
output_width
,
_DENSEPOSE_NUM_PARTS
),
dtype
=
np
.
float32
)
densepose_heatmap
[
0
,
2
,
4
,
5
]
=
1.0
densepose_heatmap
=
_logit
(
densepose_heatmap
)
densepose_regression
=
np
.
zeros
((
2
,
output_height
,
output_width
,
2
*
_DENSEPOSE_NUM_PARTS
),
dtype
=
np
.
float32
)
# The surface coordinate indices for part index 5 are:
# (5 * 2, 5 * 2 + 1), or (10, 11).
densepose_regression
[
0
,
2
,
4
,
10
:
12
]
=
0.4
,
0.7
prediction_dict
=
{
'preprocessed_inputs'
:
tf
.
zeros
((
2
,
input_height
,
input_width
,
3
)),
...
...
@@ -1383,6 +1567,14 @@ def get_fake_prediction_dict(input_height, input_width, stride):
cnma
.
SEGMENTATION_HEATMAP
:
[
tf
.
constant
(
mask_heatmap
),
tf
.
constant
(
mask_heatmap
)
],
cnma
.
DENSEPOSE_HEATMAP
:
[
tf
.
constant
(
densepose_heatmap
),
tf
.
constant
(
densepose_heatmap
),
],
cnma
.
DENSEPOSE_REGRESSION
:
[
tf
.
constant
(
densepose_regression
),
tf
.
constant
(
densepose_regression
),
]
}
return
prediction_dict
...
...
@@ -1427,12 +1619,30 @@ def get_fake_groundtruth_dict(input_height, input_width, stride):
tf
.
constant
(
mask
),
tf
.
zeros_like
(
mask
),
]
densepose_num_points
=
[
tf
.
constant
([
1
],
dtype
=
tf
.
int32
),
tf
.
constant
([
0
],
dtype
=
tf
.
int32
),
]
densepose_part_ids
=
[
tf
.
constant
([[
5
,
0
,
0
]],
dtype
=
tf
.
int32
),
tf
.
constant
([[
0
,
0
,
0
]],
dtype
=
tf
.
int32
),
]
densepose_surface_coords_np
=
np
.
zeros
((
1
,
3
,
4
),
dtype
=
np
.
float32
)
densepose_surface_coords_np
[
0
,
0
,
:]
=
0.55
,
0.55
,
0.4
,
0.7
densepose_surface_coords
=
[
tf
.
constant
(
densepose_surface_coords_np
),
tf
.
zeros_like
(
densepose_surface_coords_np
)
]
groundtruth_dict
=
{
fields
.
BoxListFields
.
boxes
:
boxes
,
fields
.
BoxListFields
.
weights
:
weights
,
fields
.
BoxListFields
.
classes
:
classes
,
fields
.
BoxListFields
.
keypoints
:
keypoints
,
fields
.
BoxListFields
.
masks
:
masks
,
fields
.
BoxListFields
.
densepose_num_points
:
densepose_num_points
,
fields
.
BoxListFields
.
densepose_part_ids
:
densepose_part_ids
,
fields
.
BoxListFields
.
densepose_surface_coords
:
densepose_surface_coords
,
fields
.
InputDataFields
.
groundtruth_labeled_classes
:
labeled_classes
,
}
return
groundtruth_dict
...
...
@@ -1574,8 +1784,9 @@ class CenterNetMetaArchRestoreTest(test_case.TestCase):
"""Test restore map for a resnet backbone."""
model
=
build_center_net_meta_arch
(
build_resnet
=
True
)
restore_map
=
model
.
restore_map
(
'classification'
)
self
.
assertIsInstance
(
restore_map
[
'feature_extractor'
],
tf
.
keras
.
Model
)
restore_from_objects_map
=
model
.
restore_from_objects
(
'classification'
)
self
.
assertIsInstance
(
restore_from_objects_map
[
'feature_extractor'
],
tf
.
keras
.
Model
)
class
DummyFeatureExtractor
(
cnma
.
CenterNetFeatureExtractor
):
...
...
@@ -1601,9 +1812,6 @@ class DummyFeatureExtractor(cnma.CenterNetFeatureExtractor):
def
postprocess
(
self
):
pass
def
restore_map
(
self
):
pass
def
call
(
self
,
inputs
):
batch_size
,
input_height
,
input_width
,
_
=
inputs
.
shape
fake_output
=
tf
.
ones
([
...
...
research/object_detection/meta_architectures/context_rcnn_meta_arch.py
View file @
31ca3b97
...
...
@@ -324,7 +324,7 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
A float32 Tensor with shape [K, new_height, new_width, depth].
"""
box_features
=
self
.
_crop_and_resize_fn
(
features_to_crop
,
proposal_boxes_normalized
,
[
features_to_crop
]
,
proposal_boxes_normalized
,
None
,
[
self
.
_initial_crop_size
,
self
.
_initial_crop_size
])
attention_features
=
self
.
_context_feature_extract_fn
(
...
...
research/object_detection/meta_architectures/context_rcnn_meta_arch_tf1_test.py
View file @
31ca3b97
...
...
@@ -20,8 +20,8 @@ from __future__ import print_function
import
functools
import
unittest
from
unittest
import
mock
# pylint: disable=g-importing-member
from
absl.testing
import
parameterized
import
mock
import
tensorflow.compat.v1
as
tf
import
tf_slim
as
slim
...
...
@@ -41,7 +41,7 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch
from
object_detection.protos
import
box_predictor_pb2
from
object_detection.protos
import
hyperparams_pb2
from
object_detection.protos
import
post_processing_pb2
from
object_detection.utils
import
ops
from
object_detection.utils
import
spatial_transform_ops
as
spatial_
ops
from
object_detection.utils
import
test_case
from
object_detection.utils
import
test_utils
from
object_detection.utils
import
tf_version
...
...
@@ -363,8 +363,9 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
max_negatives_per_positive
=
None
)
crop_and_resize_fn
=
(
ops
.
matmul_crop_and_resize
if
use_matmul_crop_and_resize
else
ops
.
native_crop_and_resize
)
spatial_ops
.
multilevel_matmul_crop_and_resize
if
use_matmul_crop_and_resize
else
spatial_ops
.
multilevel_native_crop_and_resize
)
common_kwargs
=
{
'is_training'
:
is_training
,
...
...
research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
View file @
31ca3b97
...
...
@@ -261,31 +261,6 @@ class FasterRCNNKerasFeatureExtractor(object):
"""Get model that extracts second stage box classifier features."""
pass
def
restore_from_classification_checkpoint_fn
(
self
,
first_stage_feature_extractor_scope
,
second_stage_feature_extractor_scope
):
"""Returns a map of variables to load from a foreign checkpoint.
Args:
first_stage_feature_extractor_scope: A scope name for the first stage
feature extractor.
second_stage_feature_extractor_scope: A scope name for the second stage
feature extractor.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
the model graph.
"""
variables_to_restore
=
{}
for
variable
in
variables_helper
.
get_global_variables_safely
():
for
scope_name
in
[
first_stage_feature_extractor_scope
,
second_stage_feature_extractor_scope
]:
if
variable
.
op
.
name
.
startswith
(
scope_name
):
var_name
=
variable
.
op
.
name
.
replace
(
scope_name
+
'/'
,
''
)
variables_to_restore
[
var_name
]
=
variable
return
variables_to_restore
class
FasterRCNNMetaArch
(
model
.
DetectionModel
):
"""Faster R-CNN Meta-architecture definition."""
...
...
@@ -1973,9 +1948,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
Returns:
A float32 tensor with shape [K, new_height, new_width, depth].
"""
features_to_crop
=
[
features_to_crop
]
num_levels
=
len
(
features_to_crop
)
box_levels
=
None
if
num_levels
!=
1
:
# If there are multiple levels to select, get the box levels
box_levels
=
ops
.
fpn_feature_levels
(
num_levels
,
num_levels
-
1
,
1.0
/
224
,
proposal_boxes_normalized
)
cropped_regions
=
self
.
_flatten_first_two_dimensions
(
self
.
_crop_and_resize_fn
(
features_to_crop
,
proposal_boxes_normalized
,
features_to_crop
,
proposal_boxes_normalized
,
box_levels
,
[
self
.
_initial_crop_size
,
self
.
_initial_crop_size
]))
return
self
.
_maxpool_layer
(
cropped_regions
)
...
...
@@ -2542,8 +2524,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
image_shape
[
1
],
image_shape
[
2
],
check_range
=
False
).
get
()
flat_cropped_gt_mask
=
self
.
_crop_and_resize_fn
(
tf
.
expand_dims
(
flat_gt_masks
,
-
1
),
tf
.
expand_dims
(
flat_normalized_proposals
,
axis
=
1
),
[
tf
.
expand_dims
(
flat_gt_masks
,
-
1
)
]
,
tf
.
expand_dims
(
flat_normalized_proposals
,
axis
=
1
),
None
,
[
mask_height
,
mask_width
])
# Without stopping gradients into cropped groundtruth masks the
# performance with 100-padded groundtruth masks when batch size > 1 is
...
...
@@ -2572,7 +2554,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
if
second_stage_mask_loss
is
not
None
:
mask_loss
=
tf
.
multiply
(
self
.
_second_stage_mask_loss_weight
,
second_stage_mask_loss
,
name
=
'mask_loss'
)
loss_dict
[
mask_loss
.
op
.
name
]
=
mask_loss
loss_dict
[
'Loss/BoxClassifierLoss/
mask_loss
'
]
=
mask_loss
return
loss_dict
def
_get_mask_proposal_boxes_and_classes
(
...
...
@@ -2801,6 +2783,46 @@ class FasterRCNNMetaArch(model.DetectionModel):
variables_to_restore
,
include_patterns
=
include_patterns
)
return
{
var
.
op
.
name
:
var
for
var
in
feature_extractor_variables
}
def
restore_from_objects
(
self
,
fine_tune_checkpoint_type
=
'detection'
):
"""Returns a map of Trackable objects to load from a foreign checkpoint.
Returns a dictionary of Tensorflow 2 Trackable objects (e.g. tf.Module
or Checkpoint). This enables the model to initialize based on weights from
another task. For example, the feature extractor variables from a
classification model can be used to bootstrap training of an object
detector. When loading from an object detection model, the checkpoint model
should have the same parameters as this detection model with exception of
the num_classes parameter.
Note that this function is intended to be used to restore Keras-based
models when running Tensorflow 2, whereas restore_map (above) is intended
to be used to restore Slim-based models when running Tensorflow 1.x.
Args:
fine_tune_checkpoint_type: whether to restore from a full detection
checkpoint (with compatible variable names) or to restore from a
classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`. Default 'detection'.
Returns:
A dict mapping keys to Trackable objects (tf.Module or Checkpoint).
"""
if
fine_tune_checkpoint_type
==
'classification'
:
return
{
'feature_extractor'
:
self
.
_feature_extractor
.
classification_backbone
}
elif
fine_tune_checkpoint_type
==
'detection'
:
fake_model
=
tf
.
train
.
Checkpoint
(
_feature_extractor_for_box_classifier_features
=
self
.
_feature_extractor_for_box_classifier_features
,
_feature_extractor_for_proposal_features
=
self
.
_feature_extractor_for_proposal_features
)
return
{
'model'
:
fake_model
}
else
:
raise
ValueError
(
'Not supported fine_tune_checkpoint_type: {}'
.
format
(
fine_tune_checkpoint_type
))
def
updates
(
self
):
"""Returns a list of update operators for this model.
...
...
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
View file @
31ca3b97
...
...
@@ -34,7 +34,7 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch
from
object_detection.protos
import
box_predictor_pb2
from
object_detection.protos
import
hyperparams_pb2
from
object_detection.protos
import
post_processing_pb2
from
object_detection.utils
import
ops
from
object_detection.utils
import
spatial_transform_ops
as
spatial_
ops
from
object_detection.utils
import
test_case
from
object_detection.utils
import
test_utils
from
object_detection.utils
import
tf_version
...
...
@@ -377,8 +377,9 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
max_negatives_per_positive
=
None
)
crop_and_resize_fn
=
(
ops
.
matmul_crop_and_resize
if
use_matmul_crop_and_resize
else
ops
.
native_crop_and_resize
)
spatial_ops
.
multilevel_matmul_crop_and_resize
if
use_matmul_crop_and_resize
else
spatial_ops
.
multilevel_native_crop_and_resize
)
common_kwargs
=
{
'is_training'
:
is_training
,
...
...
research/object_detection/meta_architectures/ssd_meta_arch.py
View file @
31ca3b97
...
...
@@ -250,35 +250,6 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
def
call
(
self
,
inputs
,
**
kwargs
):
return
self
.
_extract_features
(
inputs
)
def
restore_from_classification_checkpoint_fn
(
self
,
feature_extractor_scope
):
"""Returns a map of variables to load from a foreign checkpoint.
Args:
feature_extractor_scope: A scope name for the feature extractor.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
the model graph.
"""
variables_to_restore
=
{}
if
tf
.
executing_eagerly
():
for
variable
in
self
.
variables
:
# variable.name includes ":0" at the end, but the names in the
# checkpoint do not have the suffix ":0". So, we strip it here.
var_name
=
variable
.
name
[:
-
2
]
if
var_name
.
startswith
(
feature_extractor_scope
+
'/'
):
var_name
=
var_name
.
replace
(
feature_extractor_scope
+
'/'
,
''
)
variables_to_restore
[
var_name
]
=
variable
else
:
# b/137854499: use global_variables.
for
variable
in
variables_helper
.
get_global_variables_safely
():
var_name
=
variable
.
op
.
name
if
var_name
.
startswith
(
feature_extractor_scope
+
'/'
):
var_name
=
var_name
.
replace
(
feature_extractor_scope
+
'/'
,
''
)
variables_to_restore
[
var_name
]
=
variable
return
variables_to_restore
class
SSDMetaArch
(
model
.
DetectionModel
):
"""SSD Meta-architecture definition."""
...
...
@@ -508,12 +479,9 @@ class SSDMetaArch(model.DetectionModel):
ValueError: if inputs tensor does not have type tf.float32
"""
with
tf
.
name_scope
(
'Preprocessor'
):
(
resized_inputs
,
true_image_shapes
)
=
shape_utils
.
resize_images_and_return_shapes
(
inputs
,
self
.
_image_resizer_fn
)
return
(
self
.
_feature_extractor
.
preprocess
(
resized_inputs
),
true_image_shapes
)
normalized_inputs
=
self
.
_feature_extractor
.
preprocess
(
inputs
)
return
shape_utils
.
resize_images_and_return_shapes
(
normalized_inputs
,
self
.
_image_resizer_fn
)
def
_compute_clip_window
(
self
,
preprocessed_images
,
true_image_shapes
):
"""Computes clip window to use during post_processing.
...
...
@@ -1295,8 +1263,8 @@ class SSDMetaArch(model.DetectionModel):
classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`. Default 'detection'.
load_all_detection_checkpoint_vars: whether to load all variables (when
`fine_tune_checkpoint_type
='
detection
'
`). If False, only variables
within the
appropriate
scope
s
are included. Default False.
`fine_tune_checkpoint_type
` is `
detection`). If False, only variables
within the
feature extractor
scope are included. Default False.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
...
...
@@ -1311,36 +1279,56 @@ class SSDMetaArch(model.DetectionModel):
elif
fine_tune_checkpoint_type
==
'detection'
:
variables_to_restore
=
{}
if
tf
.
executing_eagerly
():
for
variable
in
variables_helper
.
get_global_variables_safely
():
var_name
=
variable
.
op
.
name
if
load_all_detection_checkpoint_vars
:
# Grab all detection vars by name
for
variable
in
self
.
variables
:
# variable.name includes ":0" at the end, but the names in the
# checkpoint do not have the suffix ":0". So, we strip it here.
var_name
=
variable
.
name
[:
-
2
]
variables_to_restore
[
var_name
]
=
variable
variables_to_restore
[
var_name
]
=
variable
else
:
# Grab just the feature extractor vars by name
for
variable
in
self
.
_feature_extractor
.
variables
:
# variable.name includes ":0" at the end, but the names in the
# checkpoint do not have the suffix ":0". So, we strip it here.
var_name
=
variable
.
name
[:
-
2
]
variables_to_restore
[
var_name
]
=
variable
else
:
for
variable
in
variables_helper
.
get_global_variables_safely
():
var_name
=
variable
.
op
.
name
if
load_all_detection_checkpoint_vars
:
if
var_name
.
startswith
(
self
.
_extract_features_scope
):
variables_to_restore
[
var_name
]
=
variable
else
:
if
var_name
.
startswith
(
self
.
_extract_features_scope
):
variables_to_restore
[
var_name
]
=
variable
return
variables_to_restore
else
:
raise
ValueError
(
'Not supported fine_tune_checkpoint_type: {}'
.
format
(
fine_tune_checkpoint_type
))
def
restore_from_objects
(
self
,
fine_tune_checkpoint_type
=
'detection'
):
"""Returns a map of Trackable objects to load from a foreign checkpoint.
Returns a dictionary of Tensorflow 2 Trackable objects (e.g. tf.Module
or Checkpoint). This enables the model to initialize based on weights from
another task. For example, the feature extractor variables from a
classification model can be used to bootstrap training of an object
detector. When loading from an object detection model, the checkpoint model
should have the same parameters as this detection model with exception of
the num_classes parameter.
Note that this function is intended to be used to restore Keras-based
models when running Tensorflow 2, whereas restore_map (above) is intended
to be used to restore Slim-based models when running Tensorflow 1.x.
Args:
fine_tune_checkpoint_type: whether to restore from a full detection
checkpoint (with compatible variable names) or to restore from a
classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`. Default 'detection'.
Returns:
A dict mapping keys to Trackable objects (tf.Module or Checkpoint).
"""
if
fine_tune_checkpoint_type
==
'classification'
:
return
{
'feature_extractor'
:
self
.
_feature_extractor
.
classification_backbone
}
elif
fine_tune_checkpoint_type
==
'detection'
:
fake_model
=
tf
.
train
.
Checkpoint
(
_feature_extractor
=
self
.
_feature_extractor
)
return
{
'model'
:
fake_model
}
else
:
raise
ValueError
(
'Not supported fine_tune_checkpoint_type: {}'
.
format
(
fine_tune_checkpoint_type
))
def
updates
(
self
):
"""Returns a list of update operators for this model.
...
...
research/object_detection/metrics/coco_evaluation.py
View file @
31ca3b97
...
...
@@ -432,14 +432,9 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
return
eval_metric_ops
def
_check_mask_type_and_value
(
array_name
,
masks
):
"""Checks whether mask dtype is uint8 and the values are either 0 or 1."""
if
masks
.
dtype
!=
np
.
uint8
:
raise
ValueError
(
'{} must be of type np.uint8. Found {}.'
.
format
(
array_name
,
masks
.
dtype
))
if
np
.
any
(
np
.
logical_and
(
masks
!=
0
,
masks
!=
1
)):
raise
ValueError
(
'{} elements can only be either 0 or 1.'
.
format
(
array_name
))
def
convert_masks_to_binary
(
masks
):
"""Converts masks to 0 or 1 and uint8 type."""
return
(
masks
>
0
).
astype
(
np
.
uint8
)
class
CocoKeypointEvaluator
(
CocoDetectionEvaluator
):
...
...
@@ -952,9 +947,8 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
groundtruth_instance_masks
=
groundtruth_dict
[
standard_fields
.
InputDataFields
.
groundtruth_instance_masks
]
_check_mask_type_and_value
(
standard_fields
.
InputDataFields
.
groundtruth_instance_masks
,
groundtruth_instance_masks
)
groundtruth_instance_masks
=
convert_masks_to_binary
(
groundtruth_instance_masks
)
self
.
_groundtruth_list
.
extend
(
coco_tools
.
ExportSingleImageGroundtruthToCoco
(
...
...
@@ -1013,9 +1007,7 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
'are incompatible: {} vs {}'
.
format
(
groundtruth_masks_shape
,
detection_masks
.
shape
))
_check_mask_type_and_value
(
standard_fields
.
DetectionResultFields
.
detection_masks
,
detection_masks
)
detection_masks
=
convert_masks_to_binary
(
detection_masks
)
self
.
_detection_masks_list
.
extend
(
coco_tools
.
ExportSingleImageDetectionMasksToCoco
(
image_id
=
image_id
,
...
...
research/object_detection/metrics/coco_evaluation_test.py
View file @
31ca3b97
...
...
@@ -1424,14 +1424,16 @@ class CocoMaskEvaluationTest(tf.test.TestCase):
image_id
=
'image3'
,
detections_dict
=
{
standard_fields
.
DetectionResultFields
.
detection_boxes
:
np
.
array
([[
25.
,
25.
,
50.
,
50.
]]),
np
.
array
([[
25.
,
25.
,
50.
,
50.
]]),
standard_fields
.
DetectionResultFields
.
detection_scores
:
np
.
array
([.
8
]),
np
.
array
([.
8
]),
standard_fields
.
DetectionResultFields
.
detection_classes
:
np
.
array
([
1
]),
np
.
array
([
1
]),
standard_fields
.
DetectionResultFields
.
detection_masks
:
np
.
pad
(
np
.
ones
([
1
,
25
,
25
],
dtype
=
np
.
uint8
),
((
0
,
0
),
(
10
,
10
),
(
10
,
10
)),
mode
=
'constant'
)
# The value of 5 is equivalent to 1, since masks will be
# thresholded and binarized before evaluation.
np
.
pad
(
5
*
np
.
ones
([
1
,
25
,
25
],
dtype
=
np
.
uint8
),
((
0
,
0
),
(
10
,
10
),
(
10
,
10
)),
mode
=
'constant'
)
})
metrics
=
coco_evaluator
.
evaluate
()
self
.
assertAlmostEqual
(
metrics
[
'DetectionMasks_Precision/mAP'
],
1.0
)
...
...
research/object_detection/metrics/oid_challenge_evaluation_utils.py
View file @
31ca3b97
...
...
@@ -136,15 +136,15 @@ def build_groundtruth_dictionary(data, class_label_map):
dictionary
=
{
standard_fields
.
InputDataFields
.
groundtruth_boxes
:
data_location
[[
'YMin'
,
'XMin'
,
'YMax'
,
'XMax'
]].
as_matrix
(),
data_location
[[
'YMin'
,
'XMin'
,
'YMax'
,
'XMax'
]].
to_numpy
(),
standard_fields
.
InputDataFields
.
groundtruth_classes
:
data_location
[
'LabelName'
].
map
(
lambda
x
:
class_label_map
[
x
]
).
as_matrix
(),
).
to_numpy
(),
standard_fields
.
InputDataFields
.
groundtruth_group_of
:
data_location
[
'IsGroupOf'
].
as_matrix
().
astype
(
int
),
data_location
[
'IsGroupOf'
].
to_numpy
().
astype
(
int
),
standard_fields
.
InputDataFields
.
groundtruth_image_classes
:
data_labels
[
'LabelName'
].
map
(
lambda
x
:
class_label_map
[
x
]
).
as_matrix
(),
).
to_numpy
(),
}
if
'Mask'
in
data_location
:
...
...
@@ -179,9 +179,9 @@ def build_predictions_dictionary(data, class_label_map):
"""
dictionary
=
{
standard_fields
.
DetectionResultFields
.
detection_classes
:
data
[
'LabelName'
].
map
(
lambda
x
:
class_label_map
[
x
]).
as_matrix
(),
data
[
'LabelName'
].
map
(
lambda
x
:
class_label_map
[
x
]).
to_numpy
(),
standard_fields
.
DetectionResultFields
.
detection_scores
:
data
[
'Score'
].
as_matrix
()
data
[
'Score'
].
to_numpy
()
}
if
'Mask'
in
data
:
...
...
@@ -192,6 +192,6 @@ def build_predictions_dictionary(data, class_label_map):
else
:
dictionary
[
standard_fields
.
DetectionResultFields
.
detection_boxes
]
=
data
[[
'YMin'
,
'XMin'
,
'YMax'
,
'XMax'
]].
as_matrix
()
]].
to_numpy
()
return
dictionary
research/object_detection/metrics/oid_vrd_challenge_evaluation_utils.py
View file @
31ca3b97
...
...
@@ -53,16 +53,16 @@ def build_groundtruth_vrd_dictionary(data, class_label_map,
boxes
=
np
.
zeros
(
data_boxes
.
shape
[
0
],
dtype
=
vrd_evaluation
.
vrd_box_data_type
)
boxes
[
'subject'
]
=
data_boxes
[[
'YMin1'
,
'XMin1'
,
'YMax1'
,
'XMax1'
]].
as_matrix
()
boxes
[
'object'
]
=
data_boxes
[[
'YMin2'
,
'XMin2'
,
'YMax2'
,
'XMax2'
]].
as_matrix
()
'XMax1'
]].
to_numpy
()
boxes
[
'object'
]
=
data_boxes
[[
'YMin2'
,
'XMin2'
,
'YMax2'
,
'XMax2'
]].
to_numpy
()
labels
=
np
.
zeros
(
data_boxes
.
shape
[
0
],
dtype
=
vrd_evaluation
.
label_data_type
)
labels
[
'subject'
]
=
data_boxes
[
'LabelName1'
].
map
(
lambda
x
:
class_label_map
[
x
]).
as_matrix
()
lambda
x
:
class_label_map
[
x
]).
to_numpy
()
labels
[
'object'
]
=
data_boxes
[
'LabelName2'
].
map
(
lambda
x
:
class_label_map
[
x
]).
as_matrix
()
lambda
x
:
class_label_map
[
x
]).
to_numpy
()
labels
[
'relation'
]
=
data_boxes
[
'RelationshipLabel'
].
map
(
lambda
x
:
relationship_label_map
[
x
]).
as_matrix
()
lambda
x
:
relationship_label_map
[
x
]).
to_numpy
()
return
{
standard_fields
.
InputDataFields
.
groundtruth_boxes
:
...
...
@@ -71,7 +71,7 @@ def build_groundtruth_vrd_dictionary(data, class_label_map,
labels
,
standard_fields
.
InputDataFields
.
groundtruth_image_classes
:
data_labels
[
'LabelName'
].
map
(
lambda
x
:
class_label_map
[
x
])
.
as_matrix
(),
.
to_numpy
(),
}
...
...
@@ -104,16 +104,16 @@ def build_predictions_vrd_dictionary(data, class_label_map,
boxes
=
np
.
zeros
(
data_boxes
.
shape
[
0
],
dtype
=
vrd_evaluation
.
vrd_box_data_type
)
boxes
[
'subject'
]
=
data_boxes
[[
'YMin1'
,
'XMin1'
,
'YMax1'
,
'XMax1'
]].
as_matrix
()
boxes
[
'object'
]
=
data_boxes
[[
'YMin2'
,
'XMin2'
,
'YMax2'
,
'XMax2'
]].
as_matrix
()
'XMax1'
]].
to_numpy
()
boxes
[
'object'
]
=
data_boxes
[[
'YMin2'
,
'XMin2'
,
'YMax2'
,
'XMax2'
]].
to_numpy
()
labels
=
np
.
zeros
(
data_boxes
.
shape
[
0
],
dtype
=
vrd_evaluation
.
label_data_type
)
labels
[
'subject'
]
=
data_boxes
[
'LabelName1'
].
map
(
lambda
x
:
class_label_map
[
x
]).
as_matrix
()
lambda
x
:
class_label_map
[
x
]).
to_numpy
()
labels
[
'object'
]
=
data_boxes
[
'LabelName2'
].
map
(
lambda
x
:
class_label_map
[
x
]).
as_matrix
()
lambda
x
:
class_label_map
[
x
]).
to_numpy
()
labels
[
'relation'
]
=
data_boxes
[
'RelationshipLabel'
].
map
(
lambda
x
:
relationship_label_map
[
x
]).
as_matrix
()
lambda
x
:
relationship_label_map
[
x
]).
to_numpy
()
return
{
standard_fields
.
DetectionResultFields
.
detection_boxes
:
...
...
@@ -121,5 +121,5 @@ def build_predictions_vrd_dictionary(data, class_label_map,
standard_fields
.
DetectionResultFields
.
detection_classes
:
labels
,
standard_fields
.
DetectionResultFields
.
detection_scores
:
data_boxes
[
'Score'
].
as_matrix
()
data_boxes
[
'Score'
].
to_numpy
()
}
research/object_detection/model_lib.py
View file @
31ca3b97
...
...
@@ -43,7 +43,6 @@ from object_detection.utils import visualization_utils as vis_utils
# pylint: disable=g-import-not-at-top
try
:
from
tensorflow.contrib
import
learn
as
contrib_learn
from
tensorflow.contrib
import
tpu
as
contrib_tpu
except
ImportError
:
# TF 2.0 doesn't ship with contrib.
pass
...
...
@@ -94,6 +93,15 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
of groundtruth boxes per image..
'groundtruth_keypoints': [batch_size, num_boxes, num_keypoints, 2] float32
tensor of keypoints (if provided in groundtruth).
'groundtruth_dp_num_points_list': [batch_size, num_boxes] int32 tensor
with the number of DensePose points for each instance (if provided in
groundtruth).
'groundtruth_dp_part_ids_list': [batch_size, num_boxes,
max_sampled_points] int32 tensor with the part ids for each DensePose
sampled point (if provided in groundtruth).
'groundtruth_dp_surface_coords_list': [batch_size, num_boxes,
max_sampled_points, 4] containing the DensePose surface coordinates for
each sampled point (if provided in groundtruth).
'groundtruth_group_of': [batch_size, num_boxes] bool tensor indicating
group_of annotations (if provided in groundtruth).
'groundtruth_labeled_classes': [batch_size, num_classes] int64
...
...
@@ -164,6 +172,21 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
groundtruth
[
input_data_fields
.
groundtruth_labeled_classes
]
=
tf
.
stack
(
labeled_classes
)
if
detection_model
.
groundtruth_has_field
(
fields
.
BoxListFields
.
densepose_num_points
):
groundtruth
[
input_data_fields
.
groundtruth_dp_num_points
]
=
tf
.
stack
(
detection_model
.
groundtruth_lists
(
fields
.
BoxListFields
.
densepose_num_points
))
if
detection_model
.
groundtruth_has_field
(
fields
.
BoxListFields
.
densepose_part_ids
):
groundtruth
[
input_data_fields
.
groundtruth_dp_part_ids
]
=
tf
.
stack
(
detection_model
.
groundtruth_lists
(
fields
.
BoxListFields
.
densepose_part_ids
))
if
detection_model
.
groundtruth_has_field
(
fields
.
BoxListFields
.
densepose_surface_coords
):
groundtruth
[
input_data_fields
.
groundtruth_dp_surface_coords
]
=
tf
.
stack
(
detection_model
.
groundtruth_lists
(
fields
.
BoxListFields
.
densepose_surface_coords
))
groundtruth
[
input_data_fields
.
num_groundtruth_boxes
]
=
(
tf
.
tile
([
max_number_of_boxes
],
multiples
=
[
groundtruth_boxes_shape
[
0
]]))
return
groundtruth
...
...
@@ -219,6 +242,9 @@ def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True):
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_keypoints
,
fields
.
InputDataFields
.
groundtruth_keypoint_visibilities
,
fields
.
InputDataFields
.
groundtruth_dp_num_points
,
fields
.
InputDataFields
.
groundtruth_dp_part_ids
,
fields
.
InputDataFields
.
groundtruth_dp_surface_coords
,
fields
.
InputDataFields
.
groundtruth_group_of
,
fields
.
InputDataFields
.
groundtruth_difficult
,
fields
.
InputDataFields
.
groundtruth_is_crowd
,
...
...
@@ -269,6 +295,18 @@ def provide_groundtruth(model, labels):
if
fields
.
InputDataFields
.
groundtruth_keypoint_visibilities
in
labels
:
gt_keypoint_visibilities_list
=
labels
[
fields
.
InputDataFields
.
groundtruth_keypoint_visibilities
]
gt_dp_num_points_list
=
None
if
fields
.
InputDataFields
.
groundtruth_dp_num_points
in
labels
:
gt_dp_num_points_list
=
labels
[
fields
.
InputDataFields
.
groundtruth_dp_num_points
]
gt_dp_part_ids_list
=
None
if
fields
.
InputDataFields
.
groundtruth_dp_part_ids
in
labels
:
gt_dp_part_ids_list
=
labels
[
fields
.
InputDataFields
.
groundtruth_dp_part_ids
]
gt_dp_surface_coords_list
=
None
if
fields
.
InputDataFields
.
groundtruth_dp_surface_coords
in
labels
:
gt_dp_surface_coords_list
=
labels
[
fields
.
InputDataFields
.
groundtruth_dp_surface_coords
]
gt_weights_list
=
None
if
fields
.
InputDataFields
.
groundtruth_weights
in
labels
:
gt_weights_list
=
labels
[
fields
.
InputDataFields
.
groundtruth_weights
]
...
...
@@ -297,13 +335,16 @@ def provide_groundtruth(model, labels):
groundtruth_masks_list
=
gt_masks_list
,
groundtruth_keypoints_list
=
gt_keypoints_list
,
groundtruth_keypoint_visibilities_list
=
gt_keypoint_visibilities_list
,
groundtruth_dp_num_points_list
=
gt_dp_num_points_list
,
groundtruth_dp_part_ids_list
=
gt_dp_part_ids_list
,
groundtruth_dp_surface_coords_list
=
gt_dp_surface_coords_list
,
groundtruth_weights_list
=
gt_weights_list
,
groundtruth_is_crowd_list
=
gt_is_crowd_list
,
groundtruth_group_of_list
=
gt_group_of_list
,
groundtruth_area_list
=
gt_area_list
)
def
create_model_fn
(
detection_model_fn
,
configs
,
hparams
,
use_tpu
=
False
,
def
create_model_fn
(
detection_model_fn
,
configs
,
hparams
=
None
,
use_tpu
=
False
,
postprocess_on_cpu
=
False
):
"""Creates a model function for `Estimator`.
...
...
@@ -377,7 +418,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
side_inputs
=
detection_model
.
get_side_inputs
(
features
)
if
use_tpu
and
train_config
.
use_bfloat16
:
with
contrib_
tpu
.
bfloat16_scope
():
with
tf
.
tpu
.
bfloat16_scope
():
prediction_dict
=
detection_model
.
predict
(
preprocessed_images
,
features
[
fields
.
InputDataFields
.
true_image_shape
],
**
side_inputs
)
...
...
@@ -392,7 +433,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
if
mode
in
(
tf
.
estimator
.
ModeKeys
.
EVAL
,
tf
.
estimator
.
ModeKeys
.
PREDICT
):
if
use_tpu
and
postprocess_on_cpu
:
detections
=
contrib_
tpu
.
outside_compilation
(
detections
=
tf
.
tpu
.
outside_compilation
(
postprocess_wrapper
,
(
prediction_dict
,
features
[
fields
.
InputDataFields
.
true_image_shape
]))
...
...
@@ -468,7 +509,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
if
mode
==
tf
.
estimator
.
ModeKeys
.
TRAIN
:
if
use_tpu
:
training_optimizer
=
contrib_
tpu
.
CrossShardOptimizer
(
training_optimizer
)
training_optimizer
=
tf
.
tpu
.
CrossShardOptimizer
(
training_optimizer
)
# Optionally freeze some layers by setting their gradients to be zero.
trainable_variables
=
None
...
...
@@ -588,7 +629,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
# EVAL executes on CPU, so use regular non-TPU EstimatorSpec.
if
use_tpu
and
mode
!=
tf
.
estimator
.
ModeKeys
.
EVAL
:
return
contrib_
tpu
.
TPUEstimatorSpec
(
return
tf
.
estimator
.
tpu
.
TPUEstimatorSpec
(
mode
=
mode
,
scaffold_fn
=
scaffold_fn
,
predictions
=
detections
,
...
...
@@ -619,8 +660,8 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
def
create_estimator_and_inputs
(
run_config
,
hparams
,
pipeline_config_path
,
hparams
=
None
,
pipeline_config_path
=
None
,
config_override
=
None
,
train_steps
=
None
,
sample_1_of_n_eval_examples
=
1
,
...
...
@@ -639,7 +680,7 @@ def create_estimator_and_inputs(run_config,
Args:
run_config: A `RunConfig`.
hparams: A `HParams`.
hparams:
(optional)
A `HParams`.
pipeline_config_path: A path to a pipeline config file.
config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to
override the config from `pipeline_config_path`.
...
...
@@ -762,14 +803,14 @@ def create_estimator_and_inputs(run_config,
model_config
=
model_config
,
predict_input_config
=
eval_input_configs
[
0
])
# Read export_to_tpu from hparams if not passed.
if
export_to_tpu
is
None
:
if
export_to_tpu
is
None
and
hparams
is
not
None
:
export_to_tpu
=
hparams
.
get
(
'export_to_tpu'
,
False
)
tf
.
logging
.
info
(
'create_estimator_and_inputs: use_tpu %s, export_to_tpu %s'
,
use_tpu
,
export_to_tpu
)
model_fn
=
model_fn_creator
(
detection_model_fn
,
configs
,
hparams
,
use_tpu
,
postprocess_on_cpu
)
if
use_tpu_estimator
:
estimator
=
contrib_
tpu
.
TPUEstimator
(
estimator
=
tf
.
estimator
.
tpu
.
TPUEstimator
(
model_fn
=
model_fn
,
train_batch_size
=
train_config
.
batch_size
,
# For each core, only batch size 1 is supported for eval.
...
...
research/object_detection/model_lib_tf2_test.py
View file @
31ca3b97
...
...
@@ -123,6 +123,9 @@ class SimpleModel(model.DetectionModel):
return
[]
def
restore_map
(
self
,
*
args
,
**
kwargs
):
pass
def
restore_from_objects
(
self
,
fine_tune_checkpoint_type
):
return
{
'model'
:
self
}
def
preprocess
(
self
,
_
):
...
...
@@ -174,7 +177,7 @@ class ModelCheckpointTest(tf.test.TestCase):
class
IncompatibleModel
(
SimpleModel
):
def
restore_
map
(
self
,
*
args
,
**
kwargs
):
def
restore_
from_objects
(
self
,
*
args
,
**
kwargs
):
return
{
'weight'
:
self
.
weight
}
...
...
@@ -207,7 +210,6 @@ class CheckpointV2Test(tf.test.TestCase):
model_lib_v2
.
load_fine_tune_checkpoint
(
self
.
_model
,
self
.
_ckpt_path
,
checkpoint_type
=
''
,
checkpoint_version
=
train_pb2
.
CheckpointVersion
.
V2
,
load_all_detection_checkpoint_vars
=
True
,
input_dataset
=
self
.
_train_input_fn
(),
unpad_groundtruth_tensors
=
True
)
np
.
testing
.
assert_allclose
(
self
.
_model
.
weight
.
numpy
(),
42
)
...
...
@@ -220,7 +222,6 @@ class CheckpointV2Test(tf.test.TestCase):
model_lib_v2
.
load_fine_tune_checkpoint
(
IncompatibleModel
(),
self
.
_ckpt_path
,
checkpoint_type
=
''
,
checkpoint_version
=
train_pb2
.
CheckpointVersion
.
V2
,
load_all_detection_checkpoint_vars
=
True
,
input_dataset
=
self
.
_train_input_fn
(),
unpad_groundtruth_tensors
=
True
)
...
...
research/object_detection/model_lib_v2.py
View file @
31ca3b97
...
...
@@ -34,7 +34,6 @@ from object_detection.protos import train_pb2
from
object_detection.utils
import
config_util
from
object_detection.utils
import
label_map_util
from
object_detection.utils
import
ops
from
object_detection.utils
import
variables_helper
from
object_detection.utils
import
visualization_utils
as
vutils
# pylint: disable=g-import-not-at-top
...
...
@@ -47,13 +46,6 @@ except ImportError:
MODEL_BUILD_UTIL_MAP
=
model_lib
.
MODEL_BUILD_UTIL_MAP
### NOTE: This file is a wip.
### TODO(kaftan): Explore adding unit tests for individual methods
### TODO(kaftan): Add unit test that checks training on a single image w/
#### groundtruth, and verfiy that loss goes to zero.
#### Possibly have version that takes it as the whole train & eval dataset,
#### & verify the loss output from the eval_loop method.
### TODO(kaftan): Make sure the unit tests run in TAP presubmits or Kokoro
RESTORE_MAP_ERROR_TEMPLATE
=
(
'Since we are restoring a v2 style checkpoint'
...
...
@@ -101,6 +93,12 @@ def _compute_losses_and_predictions_dicts(
instance masks for objects.
labels[fields.InputDataFields.groundtruth_keypoints] is a
float32 tensor containing keypoints for each box.
labels[fields.InputDataFields.groundtruth_dp_num_points] is an int32
tensor with the number of sampled DensePose points per object.
labels[fields.InputDataFields.groundtruth_dp_part_ids] is an int32
tensor with the DensePose part ids (0-indexed) per object.
labels[fields.InputDataFields.groundtruth_dp_surface_coords] is a
float32 tensor with the DensePose surface coordinates.
labels[fields.InputDataFields.groundtruth_group_of] is a tf.bool tensor
containing group_of annotations.
labels[fields.InputDataFields.groundtruth_labeled_classes] is a float32
...
...
@@ -203,6 +201,17 @@ def eager_train_step(detection_model,
labels[fields.InputDataFields.groundtruth_keypoints] is a
[batch_size, num_boxes, num_keypoints, 2] float32 tensor containing
keypoints for each box.
labels[fields.InputDataFields.groundtruth_dp_num_points] is a
[batch_size, num_boxes] int32 tensor with the number of DensePose
sampled points per instance.
labels[fields.InputDataFields.groundtruth_dp_part_ids] is a
[batch_size, num_boxes, max_sampled_points] int32 tensor with the
part ids (0-indexed) for each instance.
labels[fields.InputDataFields.groundtruth_dp_surface_coords] is a
[batch_size, num_boxes, max_sampled_points, 4] float32 tensor with the
surface coordinates for each point. Each surface coordinate is of the
form (y, x, v, u) where (y, x) are normalized image locations and
(v, u) are part-relative normalized surface coordinates.
labels[fields.InputDataFields.groundtruth_labeled_classes] is a float32
k-hot tensor of classes.
unpad_groundtruth_tensors: A parameter passed to unstack_batch.
...
...
@@ -277,14 +286,21 @@ def validate_tf_v2_checkpoint_restore_map(checkpoint_restore_map):
"""
for
key
,
value
in
checkpoint_restore_map
.
items
():
if
not
(
isinstance
(
key
,
str
)
and
isinstance
(
value
,
tf
.
Module
)):
if
not
(
isinstance
(
key
,
str
)
and
(
isinstance
(
value
,
tf
.
Module
)
or
isinstance
(
value
,
tf
.
train
.
Checkpoint
))):
raise
TypeError
(
RESTORE_MAP_ERROR_TEMPLATE
.
format
(
key
.
__class__
.
__name__
,
value
.
__class__
.
__name__
))
def
is_object_based_checkpoint
(
checkpoint_path
):
"""Returns true if `checkpoint_path` points to an object-based checkpoint."""
var_names
=
[
var
[
0
]
for
var
in
tf
.
train
.
list_variables
(
checkpoint_path
)]
return
'_CHECKPOINTABLE_OBJECT_GRAPH'
in
var_names
def
load_fine_tune_checkpoint
(
model
,
checkpoint_path
,
checkpoint_type
,
checkpoint_version
,
load_all_detection_checkpoint_vars
,
input_dataset
,
model
,
checkpoint_path
,
checkpoint_type
,
checkpoint_version
,
input_dataset
,
unpad_groundtruth_tensors
):
"""Load a fine tuning classification or detection checkpoint.
...
...
@@ -292,8 +308,7 @@ def load_fine_tune_checkpoint(
the model by computing a dummy loss. (Models might not have built their
variables before their first execution)
It then loads a variable-name based classification or detection checkpoint
that comes from converted TF 1.x slim model checkpoints.
It then loads an object-based classification or detection checkpoint.
This method updates the model in-place and does not return a value.
...
...
@@ -306,14 +321,22 @@ def load_fine_tune_checkpoint(
classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`.
checkpoint_version: train_pb2.CheckpointVersion.V1 or V2 enum indicating
whether to load checkpoints in V1 style or V2 style.
load_all_detection_checkpoint_vars: whether to load all variables (when
`fine_tune_checkpoint_type` is `detection`). If False, only variables
within the feature extractor scopes are included. Default False.
whether to load checkpoints in V1 style or V2 style. In this binary
we only support V2 style (object-based) checkpoints.
input_dataset: The tf.data Dataset the model is being trained on. Needed
to get the shapes for the dummy loss computation.
unpad_groundtruth_tensors: A parameter passed to unstack_batch.
Raises:
IOError: if `checkpoint_path` does not point at a valid object-based
checkpoint
ValueError: if `checkpoint_version` is not train_pb2.CheckpointVersion.V2
"""
if
not
is_object_based_checkpoint
(
checkpoint_path
):
raise
IOError
(
'Checkpoint is expected to be an object-based checkpoint.'
)
if
checkpoint_version
==
train_pb2
.
CheckpointVersion
.
V1
:
raise
ValueError
(
'Checkpoint version should be V2'
)
features
,
labels
=
iter
(
input_dataset
).
next
()
@
tf
.
function
...
...
@@ -330,32 +353,24 @@ def load_fine_tune_checkpoint(
labels
)
strategy
=
tf
.
compat
.
v2
.
distribute
.
get_strategy
()
strategy
.
experimental_run_v2
(
_dummy_computation_fn
,
args
=
(
features
,
labels
,
))
if
hasattr
(
tf
.
distribute
.
Strategy
,
'run'
):
strategy
.
run
(
_dummy_computation_fn
,
args
=
(
features
,
labels
,
))
else
:
strategy
.
experimental_run_v2
(
_dummy_computation_fn
,
args
=
(
features
,
labels
,
))
if
checkpoint_version
==
train_pb2
.
CheckpointVersion
.
V1
:
var_map
=
model
.
restore_map
(
fine_tune_checkpoint_type
=
checkpoint_type
,
load_all_detection_checkpoint_vars
=
(
load_all_detection_checkpoint_vars
))
available_var_map
=
variables_helper
.
get_variables_available_in_checkpoint
(
var_map
,
checkpoint_path
,
include_global_step
=
False
)
tf
.
train
.
init_from_checkpoint
(
checkpoint_path
,
available_var_map
)
elif
checkpoint_version
==
train_pb2
.
CheckpointVersion
.
V2
:
restore_map
=
model
.
restore_map
(
fine_tune_checkpoint_type
=
checkpoint_type
,
load_all_detection_checkpoint_vars
=
(
load_all_detection_checkpoint_vars
))
validate_tf_v2_checkpoint_restore_map
(
restore_map
)
ckpt
=
tf
.
train
.
Checkpoint
(
**
restore_map
)
ckpt
.
restore
(
checkpoint_path
).
assert_existing_objects_matched
()
restore_from_objects_dict
=
model
.
restore_from_objects
(
fine_tune_checkpoint_type
=
checkpoint_type
)
validate_tf_v2_checkpoint_restore_map
(
restore_from_objects_dict
)
ckpt
=
tf
.
train
.
Checkpoint
(
**
restore_from_objects_dict
)
ckpt
.
restore
(
checkpoint_path
).
assert_existing_objects_matched
()
def
get_filepath
(
strategy
,
filepath
):
...
...
@@ -398,7 +413,7 @@ def train_loop(
train_steps
=
None
,
use_tpu
=
False
,
save_final_config
=
False
,
checkpoint_every_n
=
1
000
,
checkpoint_every_n
=
5
000
,
checkpoint_max_to_keep
=
7
,
**
kwargs
):
"""Trains a model using eager + functions.
...
...
@@ -464,8 +479,10 @@ def train_loop(
if
kwargs
[
'use_bfloat16'
]:
tf
.
compat
.
v2
.
keras
.
mixed_precision
.
experimental
.
set_policy
(
'mixed_bfloat16'
)
load_all_detection_checkpoint_vars
=
(
train_config
.
load_all_detection_checkpoint_vars
)
if
train_config
.
load_all_detection_checkpoint_vars
:
raise
ValueError
(
'train_pb2.load_all_detection_checkpoint_vars '
'unsupported in TF2'
)
config_util
.
update_fine_tune_checkpoint_type
(
train_config
)
fine_tune_checkpoint_type
=
train_config
.
fine_tune_checkpoint_type
fine_tune_checkpoint_version
=
train_config
.
fine_tune_checkpoint_version
...
...
@@ -533,7 +550,6 @@ def train_loop(
train_config
.
fine_tune_checkpoint
,
fine_tune_checkpoint_type
,
fine_tune_checkpoint_version
,
load_all_detection_checkpoint_vars
,
train_input
,
unpad_groundtruth_tensors
)
...
...
@@ -570,8 +586,12 @@ def train_loop(
def
_sample_and_train
(
strategy
,
train_step_fn
,
data_iterator
):
features
,
labels
=
data_iterator
.
next
()
per_replica_losses
=
strategy
.
experimental_run_v2
(
train_step_fn
,
args
=
(
features
,
labels
))
if
hasattr
(
tf
.
distribute
.
Strategy
,
'run'
):
per_replica_losses
=
strategy
.
run
(
train_step_fn
,
args
=
(
features
,
labels
))
else
:
per_replica_losses
=
strategy
.
experimental_run_v2
(
train_step_fn
,
args
=
(
features
,
labels
))
# TODO(anjalisridhar): explore if it is safe to remove the
## num_replicas scaling of the loss and switch this to a ReduceOp.Mean
return
strategy
.
reduce
(
tf
.
distribute
.
ReduceOp
.
SUM
,
...
...
@@ -744,28 +764,25 @@ def eager_eval_loop(
return
eval_dict
,
losses_dict
,
class_agnostic
agnostic_categories
=
label_map_util
.
create_class_agnostic_category_index
()
per_class_categories
=
label_map_util
.
create_category_index_from_labelmap
(
eval_input_config
.
label_map_path
)
keypoint_edges
=
[
(
kp
.
start
,
kp
.
end
)
for
kp
in
eval_config
.
keypoint_edge
]
for
i
,
(
features
,
labels
)
in
enumerate
(
eval_dataset
):
eval_dict
,
losses_dict
,
class_agnostic
=
compute_eval_dict
(
features
,
labels
)
if
class_agnostic
:
category_index
=
agnostic_categories
else
:
category_index
=
per_class_categories
if
i
%
100
==
0
:
tf
.
logging
.
info
(
'Finished eval step %d'
,
i
)
use_original_images
=
fields
.
InputDataFields
.
original_image
in
features
if
not
use_tpu
and
use_original_images
:
# Summary for input images.
tf
.
compat
.
v2
.
summary
.
image
(
name
=
'eval_input_images'
,
step
=
global_step
,
data
=
eval_dict
[
'original_image'
],
max_outputs
=
1
)
# Summary for prediction/groundtruth side-by-side images.
if
class_agnostic
:
category_index
=
label_map_util
.
create_class_agnostic_category_index
()
else
:
category_index
=
label_map_util
.
create_category_index_from_labelmap
(
eval_input_config
.
label_map_path
)
keypoint_edges
=
[
(
kp
.
start
,
kp
.
end
)
for
kp
in
eval_config
.
keypoint_edge
]
if
use_original_images
and
i
<
eval_config
.
num_visualizations
:
sbys_image_list
=
vutils
.
draw_side_by_side_evaluation_image
(
eval_dict
,
category_index
=
category_index
,
...
...
@@ -775,10 +792,19 @@ def eager_eval_loop(
keypoint_edges
=
keypoint_edges
or
None
)
sbys_images
=
tf
.
concat
(
sbys_image_list
,
axis
=
0
)
tf
.
compat
.
v2
.
summary
.
image
(
name
=
'eval_side_by_side
'
,
name
=
'eval_side_by_side
_'
+
str
(
i
)
,
step
=
global_step
,
data
=
sbys_images
,
max_outputs
=
eval_config
.
num_visualizations
)
if
eval_util
.
has_densepose
(
eval_dict
):
dp_image_list
=
vutils
.
draw_densepose_visualizations
(
eval_dict
)
dp_images
=
tf
.
concat
(
dp_image_list
,
axis
=
0
)
tf
.
compat
.
v2
.
summary
.
image
(
name
=
'densepose_detections_'
+
str
(
i
),
step
=
global_step
,
data
=
dp_images
,
max_outputs
=
eval_config
.
num_visualizations
)
if
evaluators
is
None
:
if
class_agnostic
:
...
...
@@ -807,8 +833,10 @@ def eager_eval_loop(
eval_metrics
[
loss_key
]
=
loss_metrics
[
loss_key
].
result
()
eval_metrics
=
{
str
(
k
):
v
for
k
,
v
in
eval_metrics
.
items
()}
tf
.
logging
.
info
(
'Eval metrics at step %d'
,
global_step
)
for
k
in
eval_metrics
:
tf
.
compat
.
v2
.
summary
.
scalar
(
k
,
eval_metrics
[
k
],
step
=
global_step
)
tf
.
logging
.
info
(
'
\t
+ %s: %f'
,
k
,
eval_metrics
[
k
])
return
eval_metrics
...
...
@@ -826,6 +854,7 @@ def eval_continuously(
checkpoint_dir
=
None
,
wait_interval
=
180
,
timeout
=
3600
,
eval_index
=
None
,
**
kwargs
):
"""Run continuous evaluation of a detection model eagerly.
...
...
@@ -855,6 +884,8 @@ def eval_continuously(
new checkpoint.
timeout: The maximum number of seconds to wait for a checkpoint. Execution
will terminate if no new checkpoints are found after these many seconds.
eval_index: int, optional If give, only evaluate the dataset at the given
index.
**kwargs: Additional keyword arguments for configuration override.
"""
...
...
@@ -908,6 +939,11 @@ def eval_continuously(
model
=
detection_model
)
eval_inputs
.
append
((
eval_input_config
.
name
,
next_eval_input
))
if
eval_index
is
not
None
:
eval_inputs
=
[
eval_inputs
[
eval_index
]]
tf
.
logging
.
info
(
'eval_index selected - {}'
.
format
(
eval_inputs
))
global_step
=
tf
.
compat
.
v2
.
Variable
(
0
,
trainable
=
False
,
dtype
=
tf
.
compat
.
v2
.
dtypes
.
int64
)
...
...
@@ -920,7 +956,7 @@ def eval_continuously(
for
eval_name
,
eval_input
in
eval_inputs
:
summary_writer
=
tf
.
compat
.
v2
.
summary
.
create_file_writer
(
model_dir
+
'
/
eval'
+
eval_name
)
os
.
path
.
join
(
model_dir
,
'eval'
,
eval_name
)
)
with
summary_writer
.
as_default
():
eager_eval_loop
(
detection_model
,
...
...
research/object_detection/model_main.py
View file @
31ca3b97
...
...
@@ -22,7 +22,6 @@ from absl import flags
import
tensorflow.compat.v1
as
tf
from
object_detection
import
model_hparams
from
object_detection
import
model_lib
flags
.
DEFINE_string
(
...
...
@@ -41,10 +40,6 @@ flags.DEFINE_integer('sample_1_of_n_eval_on_train_examples', 5, 'Will sample '
'one of every n train input examples for evaluation, '
'where n is provided. This is only used if '
'`eval_training_data` is True.'
)
flags
.
DEFINE_string
(
'hparams_overrides'
,
None
,
'Hyperparameter overrides, '
'represented as a string containing comma-separated '
'hparam_name=value pairs.'
)
flags
.
DEFINE_string
(
'checkpoint_dir'
,
None
,
'Path to directory holding a checkpoint. If '
'`checkpoint_dir` is provided, this binary operates in eval-only mode, '
...
...
@@ -68,7 +63,6 @@ def main(unused_argv):
train_and_eval_dict
=
model_lib
.
create_estimator_and_inputs
(
run_config
=
config
,
hparams
=
model_hparams
.
create_hparams
(
FLAGS
.
hparams_overrides
),
pipeline_config_path
=
FLAGS
.
pipeline_config_path
,
train_steps
=
FLAGS
.
num_train_steps
,
sample_1_of_n_eval_examples
=
FLAGS
.
sample_1_of_n_eval_examples
,
...
...
research/object_detection/model_main_tf2.py
View file @
31ca3b97
...
...
@@ -16,14 +16,6 @@
r
"""Creates and runs TF2 object detection models.
##################################
NOTE: This module has not been fully tested; please bear with us while we iron
out the kinks.
##################################
When a TPU device is available, this binary uses TPUStrategy. Otherwise, it uses
GPUS with MirroredStrategy/MultiWorkerMirroredStrategy.
For local training/evaluation run:
PIPELINE_CONFIG_PATH=path/to/pipeline.config
MODEL_DIR=/tmp/model_outputs
...
...
@@ -61,6 +53,12 @@ flags.DEFINE_string(
flags
.
DEFINE_integer
(
'eval_timeout'
,
3600
,
'Number of seconds to wait for an'
'evaluation checkpoint before exiting.'
)
flags
.
DEFINE_bool
(
'use_tpu'
,
False
,
'Whether the job is executing on a TPU.'
)
flags
.
DEFINE_string
(
'tpu_name'
,
default
=
None
,
help
=
'Name of the Cloud TPU for Cluster Resolvers.'
)
flags
.
DEFINE_integer
(
'num_workers'
,
1
,
'When num_workers > 1, training uses '
'MultiWorkerMirroredStrategy. When num_workers = 1 it uses '
...
...
@@ -86,7 +84,10 @@ def main(unused_argv):
wait_interval
=
300
,
timeout
=
FLAGS
.
eval_timeout
)
else
:
if
FLAGS
.
use_tpu
:
resolver
=
tf
.
distribute
.
cluster_resolver
.
TPUClusterResolver
()
# TPU is automatically inferred if tpu_name is None and
# we are running under cloud ai-platform.
resolver
=
tf
.
distribute
.
cluster_resolver
.
TPUClusterResolver
(
FLAGS
.
tpu_name
)
tf
.
config
.
experimental_connect_to_cluster
(
resolver
)
tf
.
tpu
.
experimental
.
initialize_tpu_system
(
resolver
)
strategy
=
tf
.
distribute
.
experimental
.
TPUStrategy
(
resolver
)
...
...
research/object_detection/model_tpu_main.py
View file @
31ca3b97
...
...
@@ -26,18 +26,8 @@ from absl import flags
import
tensorflow.compat.v1
as
tf
from
object_detection
import
model_hparams
from
object_detection
import
model_lib
# pylint: disable=g-import-not-at-top
try
:
from
tensorflow.contrib
import
cluster_resolver
as
contrib_cluster_resolver
from
tensorflow.contrib
import
tpu
as
contrib_tpu
except
ImportError
:
# TF 2.0 doesn't ship with contrib.
pass
# pylint: enable=g-import-not-at-top
tf
.
flags
.
DEFINE_bool
(
'use_tpu'
,
True
,
'Use TPUs rather than plain CPUs'
)
# Cloud TPU Cluster Resolvers
...
...
@@ -67,10 +57,6 @@ flags.DEFINE_string('mode', 'train',
flags
.
DEFINE_integer
(
'train_batch_size'
,
None
,
'Batch size for training. If '
'this is not provided, batch size is read from training '
'config.'
)
flags
.
DEFINE_string
(
'hparams_overrides'
,
None
,
'Comma-separated list of '
'hyperparameters to override defaults.'
)
flags
.
DEFINE_integer
(
'num_train_steps'
,
None
,
'Number of train steps.'
)
flags
.
DEFINE_boolean
(
'eval_training_data'
,
False
,
'If training data should be evaluated for this job.'
)
...
...
@@ -99,15 +85,15 @@ def main(unused_argv):
flags
.
mark_flag_as_required
(
'pipeline_config_path'
)
tpu_cluster_resolver
=
(
contrib_
cluster_resolver
.
TPUClusterResolver
(
tf
.
distribute
.
cluster_resolver
.
TPUClusterResolver
(
tpu
=
[
FLAGS
.
tpu_name
],
zone
=
FLAGS
.
tpu_zone
,
project
=
FLAGS
.
gcp_project
))
tpu_grpc_url
=
tpu_cluster_resolver
.
get_master
()
config
=
contrib_
tpu
.
RunConfig
(
config
=
tf
.
estimator
.
tpu
.
RunConfig
(
master
=
tpu_grpc_url
,
evaluation_master
=
tpu_grpc_url
,
model_dir
=
FLAGS
.
model_dir
,
tpu_config
=
contrib_
tpu
.
TPUConfig
(
tpu_config
=
tf
.
estimator
.
tpu
.
TPUConfig
(
iterations_per_loop
=
FLAGS
.
iterations_per_loop
,
num_shards
=
FLAGS
.
num_shards
))
...
...
@@ -117,7 +103,6 @@ def main(unused_argv):
train_and_eval_dict
=
model_lib
.
create_estimator_and_inputs
(
run_config
=
config
,
hparams
=
model_hparams
.
create_hparams
(
FLAGS
.
hparams_overrides
),
pipeline_config_path
=
FLAGS
.
pipeline_config_path
,
train_steps
=
FLAGS
.
num_train_steps
,
sample_1_of_n_eval_examples
=
FLAGS
.
sample_1_of_n_eval_examples
,
...
...
research/object_detection/models/bidirectional_feature_pyramid_generators.py
0 → 100644
View file @
31ca3b97
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functions to generate bidirectional feature pyramids based on image features.
Provides bidirectional feature pyramid network (BiFPN) generators that can be
used to build object detection feature extractors, as proposed by Tan et al.
See https://arxiv.org/abs/1911.09070 for more details.
"""
import
collections
import
functools
from
six.moves
import
range
from
six.moves
import
zip
import
tensorflow
as
tf
from
object_detection.utils
import
bifpn_utils
def
_create_bifpn_input_config
(
fpn_min_level
,
fpn_max_level
,
input_max_level
,
level_scales
=
None
):
"""Creates a BiFPN input config for the input levels from a backbone network.
Args:
fpn_min_level: the minimum pyramid level (highest feature map resolution) to
use in the BiFPN.
fpn_max_level: the maximum pyramid level (lowest feature map resolution) to
use in the BiFPN.
input_max_level: the maximum pyramid level that will be provided as input to
the BiFPN. Accordingly, the BiFPN will compute additional pyramid levels
from input_max_level, up to the desired fpn_max_level.
level_scales: a list of pyramid level scale factors. If 'None', each level's
scale is set to 2^level by default, which corresponds to each successive
feature map scaling by a factor of 2.
Returns:
A list of dictionaries for each feature map expected as input to the BiFPN,
where each has entries for the feature map 'name' and 'scale'.
"""
if
not
level_scales
:
level_scales
=
[
2
**
i
for
i
in
range
(
fpn_min_level
,
fpn_max_level
+
1
)]
bifpn_input_params
=
[]
for
i
in
range
(
fpn_min_level
,
min
(
fpn_max_level
,
input_max_level
)
+
1
):
bifpn_input_params
.
append
({
'name'
:
'0_up_lvl_{}'
.
format
(
i
),
'scale'
:
level_scales
[
i
-
fpn_min_level
]
})
return
bifpn_input_params
def
_get_bifpn_output_node_names
(
fpn_min_level
,
fpn_max_level
,
node_config
):
"""Returns a list of BiFPN output node names, given a BiFPN node config.
Args:
fpn_min_level: the minimum pyramid level (highest feature map resolution)
used by the BiFPN.
fpn_max_level: the maximum pyramid level (lowest feature map resolution)
used by the BiFPN.
node_config: the BiFPN node_config, a list of dictionaries corresponding to
each node in the BiFPN computation graph, where each entry should have an
associated 'name'.
Returns:
A list of strings corresponding to the names of the output BiFPN nodes.
"""
num_output_nodes
=
fpn_max_level
-
fpn_min_level
+
1
return
[
node
[
'name'
]
for
node
in
node_config
[
-
num_output_nodes
:]]
def
_create_bifpn_node_config
(
bifpn_num_iterations
,
bifpn_num_filters
,
fpn_min_level
,
fpn_max_level
,
input_max_level
,
bifpn_node_params
=
None
,
level_scales
=
None
):
"""Creates a config specifying a bidirectional feature pyramid network.
Args:
bifpn_num_iterations: the number of top-down bottom-up feature computations
to repeat in the BiFPN.
bifpn_num_filters: the number of filters (channels) for every feature map
used in the BiFPN.
fpn_min_level: the minimum pyramid level (highest feature map resolution) to
use in the BiFPN.
fpn_max_level: the maximum pyramid level (lowest feature map resolution) to
use in the BiFPN.
input_max_level: the maximum pyramid level that will be provided as input to
the BiFPN. Accordingly, the BiFPN will compute additional pyramid levels
from input_max_level, up to the desired fpn_max_level.
bifpn_node_params: If not 'None', a dictionary of additional default BiFPN
node parameters that will be applied to all BiFPN nodes.
level_scales: a list of pyramid level scale factors. If 'None', each level's
scale is set to 2^level by default, which corresponds to each successive
feature map scaling by a factor of 2.
Returns:
A list of dictionaries used to define nodes in the BiFPN computation graph,
as proposed by EfficientDet, Tan et al (https://arxiv.org/abs/1911.09070).
Each node's entry has the corresponding keys:
name: String. The name of this node in the BiFPN. The node name follows
the format '{bifpn_iteration}_{dn|up}_lvl_{pyramid_level}', where 'dn'
or 'up' refers to whether the node is in the top-down or bottom-up
portion of a single BiFPN iteration.
scale: the scale factor for this node, by default 2^level.
inputs: A list of names of nodes which are inputs to this node.
num_channels: The number of channels for this node.
combine_method: String. Name of the method used to combine input
node feature maps, 'fast_attention' by default for nodes which have more
than one input. Otherwise, 'None' for nodes with only one input node.
input_op: A (partial) function which is called to construct the layers
that will be applied to this BiFPN node's inputs. This function is
called with the arguments:
input_op(name, input_scale, input_num_channels, output_scale,
output_num_channels, conv_hyperparams, is_training,
freeze_batchnorm)
post_combine_op: A (partial) function which is called to construct the
layers that will be applied to the result of the combine operation for
this BiFPN node. This function will be called with the arguments:
post_combine_op(name, conv_hyperparams, is_training, freeze_batchnorm)
If 'None', then no layers will be applied after the combine operation
for this node.
"""
if
not
level_scales
:
level_scales
=
[
2
**
i
for
i
in
range
(
fpn_min_level
,
fpn_max_level
+
1
)]
default_node_params
=
{
'num_channels'
:
bifpn_num_filters
,
'combine_method'
:
'fast_attention'
,
'input_op'
:
functools
.
partial
(
_create_bifpn_resample_block
,
downsample_method
=
'max_pooling'
),
'post_combine_op'
:
functools
.
partial
(
bifpn_utils
.
create_conv_block
,
num_filters
=
bifpn_num_filters
,
kernel_size
=
3
,
strides
=
1
,
padding
=
'SAME'
,
use_separable
=
True
,
apply_batchnorm
=
True
,
apply_activation
=
True
,
conv_bn_act_pattern
=
False
),
}
if
bifpn_node_params
:
default_node_params
.
update
(
bifpn_node_params
)
bifpn_node_params
=
[]
# Create additional base pyramid levels not provided as input to the BiFPN.
# Note, combine_method and post_combine_op are set to None for additional
# base pyramid levels because they do not combine multiple input BiFPN nodes.
for
i
in
range
(
input_max_level
+
1
,
fpn_max_level
+
1
):
node_params
=
dict
(
default_node_params
)
node_params
.
update
({
'name'
:
'0_up_lvl_{}'
.
format
(
i
),
'scale'
:
level_scales
[
i
-
fpn_min_level
],
'inputs'
:
[
'0_up_lvl_{}'
.
format
(
i
-
1
)],
'combine_method'
:
None
,
'post_combine_op'
:
None
,
})
bifpn_node_params
.
append
(
node_params
)
for
i
in
range
(
bifpn_num_iterations
):
# The first bottom-up feature pyramid (which includes the input pyramid
# levels from the backbone network and the additional base pyramid levels)
# is indexed at 0. So, the first top-down bottom-up pass of the BiFPN is
# indexed from 1, and repeated for bifpn_num_iterations iterations.
bifpn_i
=
i
+
1
# Create top-down nodes.
for
level_i
in
reversed
(
range
(
fpn_min_level
,
fpn_max_level
)):
inputs
=
[]
# BiFPN nodes in the top-down pass receive input from the corresponding
# level from the previous BiFPN iteration's bottom-up pass, except for the
# bottom-most (min) level node, which is computed once in the initial
# bottom-up pass, and is afterwards only computed in each top-down pass.
if
level_i
>
fpn_min_level
or
bifpn_i
==
1
:
inputs
.
append
(
'{}_up_lvl_{}'
.
format
(
bifpn_i
-
1
,
level_i
))
else
:
inputs
.
append
(
'{}_dn_lvl_{}'
.
format
(
bifpn_i
-
1
,
level_i
))
inputs
.
append
(
bifpn_node_params
[
-
1
][
'name'
])
node_params
=
dict
(
default_node_params
)
node_params
.
update
({
'name'
:
'{}_dn_lvl_{}'
.
format
(
bifpn_i
,
level_i
),
'scale'
:
level_scales
[
level_i
-
fpn_min_level
],
'inputs'
:
inputs
})
bifpn_node_params
.
append
(
node_params
)
# Create bottom-up nodes.
for
level_i
in
range
(
fpn_min_level
+
1
,
fpn_max_level
+
1
):
# BiFPN nodes in the bottom-up pass receive input from the corresponding
# level from the preceding top-down pass, except for the top (max) level
# which does not have a corresponding node in the top-down pass.
inputs
=
[
'{}_up_lvl_{}'
.
format
(
bifpn_i
-
1
,
level_i
)]
if
level_i
<
fpn_max_level
:
inputs
.
append
(
'{}_dn_lvl_{}'
.
format
(
bifpn_i
,
level_i
))
inputs
.
append
(
bifpn_node_params
[
-
1
][
'name'
])
node_params
=
dict
(
default_node_params
)
node_params
.
update
({
'name'
:
'{}_up_lvl_{}'
.
format
(
bifpn_i
,
level_i
),
'scale'
:
level_scales
[
level_i
-
fpn_min_level
],
'inputs'
:
inputs
})
bifpn_node_params
.
append
(
node_params
)
return
bifpn_node_params
def
_create_bifpn_resample_block
(
name
,
input_scale
,
input_num_channels
,
output_scale
,
output_num_channels
,
conv_hyperparams
,
is_training
,
freeze_batchnorm
,
downsample_method
=
None
,
use_native_resize_op
=
False
,
maybe_apply_1x1_conv
=
True
,
apply_1x1_pre_sampling
=
True
,
apply_1x1_post_sampling
=
False
):
"""Creates resample block layers for input feature maps to BiFPN nodes.
Args:
name: String. Name used for this block of layers.
input_scale: Scale factor of the input feature map.
input_num_channels: Number of channels in the input feature map.
output_scale: Scale factor of the output feature map.
output_num_channels: Number of channels in the output feature map.
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
containing hyperparameters for convolution ops.
is_training: Indicates whether the feature generator is in training mode.
freeze_batchnorm: Bool. Whether to freeze batch norm parameters during
training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm
params.
downsample_method: String. Method to use when downsampling feature maps.
use_native_resize_op: Bool. Whether to use the native resize up when
upsampling feature maps.
maybe_apply_1x1_conv: Bool. If 'True', a 1x1 convolution will only be
applied if the input_num_channels differs from the output_num_channels.
apply_1x1_pre_sampling: Bool. Whether a 1x1 convolution will be applied to
the input feature map before the up/down-sampling operation.
apply_1x1_post_sampling: Bool. Whether a 1x1 convolution will be applied to
the input feature map after the up/down-sampling operation.
Returns:
A list of layers which may be applied to the input feature maps in order to
compute feature maps with the specified scale and number of channels.
"""
# By default, 1x1 convolutions are only applied before sampling when the
# number of input and output channels differ.
if
maybe_apply_1x1_conv
and
output_num_channels
==
input_num_channels
:
apply_1x1_pre_sampling
=
False
apply_1x1_post_sampling
=
False
apply_bn_for_resampling
=
True
layers
=
[]
if
apply_1x1_pre_sampling
:
layers
.
extend
(
bifpn_utils
.
create_conv_block
(
name
=
name
+
'1x1_pre_sample/'
,
num_filters
=
output_num_channels
,
kernel_size
=
1
,
strides
=
1
,
padding
=
'SAME'
,
use_separable
=
False
,
apply_batchnorm
=
apply_bn_for_resampling
,
apply_activation
=
False
,
conv_hyperparams
=
conv_hyperparams
,
is_training
=
is_training
,
freeze_batchnorm
=
freeze_batchnorm
))
layers
.
extend
(
bifpn_utils
.
create_resample_feature_map_ops
(
input_scale
,
output_scale
,
downsample_method
,
use_native_resize_op
,
conv_hyperparams
,
is_training
,
freeze_batchnorm
,
name
))
if
apply_1x1_post_sampling
:
layers
.
extend
(
bifpn_utils
.
create_conv_block
(
name
=
name
+
'1x1_post_sample/'
,
num_filters
=
output_num_channels
,
kernel_size
=
1
,
strides
=
1
,
padding
=
'SAME'
,
use_separable
=
False
,
apply_batchnorm
=
apply_bn_for_resampling
,
apply_activation
=
False
,
conv_hyperparams
=
conv_hyperparams
,
is_training
=
is_training
,
freeze_batchnorm
=
freeze_batchnorm
))
return
layers
def
_create_bifpn_combine_op
(
num_inputs
,
name
,
combine_method
):
"""Creates a BiFPN output config, a list of the output BiFPN node names.
Args:
num_inputs: The number of inputs to this combine operation.
name: String. The name of this combine operation.
combine_method: String. The method used to combine input feature maps.
Returns:
A function which may be called with a list of num_inputs feature maps
and which will return a single feature map.
"""
combine_op
=
None
if
num_inputs
<
1
:
raise
ValueError
(
'Expected at least 1 input for BiFPN combine.'
)
elif
num_inputs
==
1
:
combine_op
=
lambda
x
:
x
[
0
]
else
:
combine_op
=
bifpn_utils
.
BiFPNCombineLayer
(
combine_method
=
combine_method
,
name
=
name
)
return
combine_op
class
KerasBiFpnFeatureMaps
(
tf
.
keras
.
Model
):
"""Generates Keras based BiFPN feature maps from an input feature map pyramid.
A Keras model that generates multi-scale feature maps for detection by
iteratively computing top-down and bottom-up feature pyramids, as in the
EfficientDet paper by Tan et al, see arxiv.org/abs/1911.09070 for details.
"""
def
__init__
(
self
,
bifpn_num_iterations
,
bifpn_num_filters
,
fpn_min_level
,
fpn_max_level
,
input_max_level
,
is_training
,
conv_hyperparams
,
freeze_batchnorm
,
bifpn_node_params
=
None
,
name
=
None
):
"""Constructor.
Args:
bifpn_num_iterations: The number of top-down bottom-up iterations.
bifpn_num_filters: The number of filters (channels) to be used for all
feature maps in this BiFPN.
fpn_min_level: The minimum pyramid level (highest feature map resolution)
to use in the BiFPN.
fpn_max_level: The maximum pyramid level (lowest feature map resolution)
to use in the BiFPN.
input_max_level: The maximum pyramid level that will be provided as input
to the BiFPN. Accordingly, the BiFPN will compute any additional pyramid
levels from input_max_level up to the desired fpn_max_level, with each
successivel level downsampling by a scale factor of 2 by default.
is_training: Indicates whether the feature generator is in training mode.
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
containing hyperparameters for convolution ops.
freeze_batchnorm: Bool. Whether to freeze batch norm parameters during
training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm
params.
bifpn_node_params: An optional dictionary that may be used to specify
default parameters for BiFPN nodes, without the need to provide a custom
bifpn_node_config. For example, if '{ combine_method: 'sum' }', then all
BiFPN nodes will combine input feature maps by summation, rather than
by the default fast attention method.
name: A string name scope to assign to the model. If 'None', Keras
will auto-generate one from the class name.
"""
super
(
KerasBiFpnFeatureMaps
,
self
).
__init__
(
name
=
name
)
bifpn_node_config
=
_create_bifpn_node_config
(
bifpn_num_iterations
,
bifpn_num_filters
,
fpn_min_level
,
fpn_max_level
,
input_max_level
,
bifpn_node_params
)
bifpn_input_config
=
_create_bifpn_input_config
(
fpn_min_level
,
fpn_max_level
,
input_max_level
)
bifpn_output_node_names
=
_get_bifpn_output_node_names
(
fpn_min_level
,
fpn_max_level
,
bifpn_node_config
)
self
.
bifpn_node_config
=
bifpn_node_config
self
.
bifpn_output_node_names
=
bifpn_output_node_names
self
.
node_input_blocks
=
[]
self
.
node_combine_op
=
[]
self
.
node_post_combine_block
=
[]
all_node_params
=
bifpn_input_config
all_node_names
=
[
node
[
'name'
]
for
node
in
all_node_params
]
for
node_config
in
bifpn_node_config
:
# Maybe transform and/or resample input feature maps.
input_blocks
=
[]
for
input_name
in
node_config
[
'inputs'
]:
if
input_name
not
in
all_node_names
:
raise
ValueError
(
'Input feature map ({}) does not exist:'
.
format
(
input_name
))
input_index
=
all_node_names
.
index
(
input_name
)
input_params
=
all_node_params
[
input_index
]
input_block
=
node_config
[
'input_op'
](
name
=
'{}/input_{}/'
.
format
(
node_config
[
'name'
],
input_name
),
input_scale
=
input_params
[
'scale'
],
input_num_channels
=
input_params
.
get
(
'num_channels'
,
None
),
output_scale
=
node_config
[
'scale'
],
output_num_channels
=
node_config
[
'num_channels'
],
conv_hyperparams
=
conv_hyperparams
,
is_training
=
is_training
,
freeze_batchnorm
=
freeze_batchnorm
)
input_blocks
.
append
((
input_index
,
input_block
))
# Combine input feature maps.
combine_op
=
_create_bifpn_combine_op
(
num_inputs
=
len
(
input_blocks
),
name
=
(
node_config
[
'name'
]
+
'/combine'
),
combine_method
=
node_config
[
'combine_method'
])
# Post-combine layers.
post_combine_block
=
[]
if
node_config
[
'post_combine_op'
]:
post_combine_block
.
extend
(
node_config
[
'post_combine_op'
](
name
=
node_config
[
'name'
]
+
'/post_combine/'
,
conv_hyperparams
=
conv_hyperparams
,
is_training
=
is_training
,
freeze_batchnorm
=
freeze_batchnorm
))
self
.
node_input_blocks
.
append
(
input_blocks
)
self
.
node_combine_op
.
append
(
combine_op
)
self
.
node_post_combine_block
.
append
(
post_combine_block
)
all_node_params
.
append
(
node_config
)
all_node_names
.
append
(
node_config
[
'name'
])
def
call
(
self
,
feature_pyramid
):
"""Compute BiFPN feature maps from input feature pyramid.
Executed when calling the `.__call__` method on input.
Args:
feature_pyramid: list of tuples of (tensor_name, image_feature_tensor).
Returns:
feature_maps: an OrderedDict mapping keys (feature map names) to
tensors where each tensor has shape [batch, height_i, width_i, depth_i].
"""
feature_maps
=
[
el
[
1
]
for
el
in
feature_pyramid
]
output_feature_maps
=
[
None
for
node
in
self
.
bifpn_output_node_names
]
for
index
,
node
in
enumerate
(
self
.
bifpn_node_config
):
node_scope
=
'node_{:02d}'
.
format
(
index
)
with
tf
.
name_scope
(
node_scope
):
# Apply layer blocks to this node's input feature maps.
input_block_results
=
[]
for
input_index
,
input_block
in
self
.
node_input_blocks
[
index
]:
block_result
=
feature_maps
[
input_index
]
for
layer
in
input_block
:
block_result
=
layer
(
block_result
)
input_block_results
.
append
(
block_result
)
# Combine the resulting feature maps.
node_result
=
self
.
node_combine_op
[
index
](
input_block_results
)
# Apply post-combine layer block if applicable.
for
layer
in
self
.
node_post_combine_block
[
index
]:
node_result
=
layer
(
node_result
)
feature_maps
.
append
(
node_result
)
if
node
[
'name'
]
in
self
.
bifpn_output_node_names
:
index
=
self
.
bifpn_output_node_names
.
index
(
node
[
'name'
])
output_feature_maps
[
index
]
=
node_result
return
collections
.
OrderedDict
(
zip
(
self
.
bifpn_output_node_names
,
output_feature_maps
))
research/object_detection/models/bidirectional_feature_pyramid_generators_tf2_test.py
0 → 100644
View file @
31ca3b97
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for bidirectional feature pyramid generators."""
import
unittest
from
absl.testing
import
parameterized
import
tensorflow.compat.v1
as
tf
from
google.protobuf
import
text_format
from
object_detection.builders
import
hyperparams_builder
from
object_detection.models
import
bidirectional_feature_pyramid_generators
as
bifpn_generators
from
object_detection.protos
import
hyperparams_pb2
from
object_detection.utils
import
test_case
from
object_detection.utils
import
test_utils
from
object_detection.utils
import
tf_version
@
parameterized
.
parameters
({
'bifpn_num_iterations'
:
2
},
{
'bifpn_num_iterations'
:
8
})
@
unittest
.
skipIf
(
tf_version
.
is_tf1
(),
'Skipping TF2.X only test.'
)
class
BiFPNFeaturePyramidGeneratorTest
(
test_case
.
TestCase
):
def
_build_conv_hyperparams
(
self
):
conv_hyperparams
=
hyperparams_pb2
.
Hyperparams
()
conv_hyperparams_text_proto
=
"""
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
force_use_bias: true
"""
text_format
.
Merge
(
conv_hyperparams_text_proto
,
conv_hyperparams
)
return
hyperparams_builder
.
KerasLayerHyperparams
(
conv_hyperparams
)
def
test_get_expected_feature_map_shapes
(
self
,
bifpn_num_iterations
):
with
test_utils
.
GraphContextOrNone
()
as
g
:
image_features
=
[
(
'block3'
,
tf
.
random_uniform
([
4
,
16
,
16
,
256
],
dtype
=
tf
.
float32
)),
(
'block4'
,
tf
.
random_uniform
([
4
,
8
,
8
,
256
],
dtype
=
tf
.
float32
)),
(
'block5'
,
tf
.
random_uniform
([
4
,
4
,
4
,
256
],
dtype
=
tf
.
float32
))
]
bifpn_generator
=
bifpn_generators
.
KerasBiFpnFeatureMaps
(
bifpn_num_iterations
=
bifpn_num_iterations
,
bifpn_num_filters
=
128
,
fpn_min_level
=
3
,
fpn_max_level
=
7
,
input_max_level
=
5
,
is_training
=
True
,
conv_hyperparams
=
self
.
_build_conv_hyperparams
(),
freeze_batchnorm
=
False
)
def
graph_fn
():
feature_maps
=
bifpn_generator
(
image_features
)
return
feature_maps
expected_feature_map_shapes
=
{
'{}_dn_lvl_3'
.
format
(
bifpn_num_iterations
):
(
4
,
16
,
16
,
128
),
'{}_up_lvl_4'
.
format
(
bifpn_num_iterations
):
(
4
,
8
,
8
,
128
),
'{}_up_lvl_5'
.
format
(
bifpn_num_iterations
):
(
4
,
4
,
4
,
128
),
'{}_up_lvl_6'
.
format
(
bifpn_num_iterations
):
(
4
,
2
,
2
,
128
),
'{}_up_lvl_7'
.
format
(
bifpn_num_iterations
):
(
4
,
1
,
1
,
128
)}
out_feature_maps
=
self
.
execute
(
graph_fn
,
[],
g
)
out_feature_map_shapes
=
dict
(
(
key
,
value
.
shape
)
for
key
,
value
in
out_feature_maps
.
items
())
self
.
assertDictEqual
(
expected_feature_map_shapes
,
out_feature_map_shapes
)
def
test_get_expected_variable_names
(
self
,
bifpn_num_iterations
):
with
test_utils
.
GraphContextOrNone
()
as
g
:
image_features
=
[
(
'block3'
,
tf
.
random_uniform
([
4
,
16
,
16
,
256
],
dtype
=
tf
.
float32
)),
(
'block4'
,
tf
.
random_uniform
([
4
,
8
,
8
,
256
],
dtype
=
tf
.
float32
)),
(
'block5'
,
tf
.
random_uniform
([
4
,
4
,
4
,
256
],
dtype
=
tf
.
float32
))
]
bifpn_generator
=
bifpn_generators
.
KerasBiFpnFeatureMaps
(
bifpn_num_iterations
=
bifpn_num_iterations
,
bifpn_num_filters
=
128
,
fpn_min_level
=
3
,
fpn_max_level
=
7
,
input_max_level
=
5
,
is_training
=
True
,
conv_hyperparams
=
self
.
_build_conv_hyperparams
(),
freeze_batchnorm
=
False
,
name
=
'bifpn'
)
def
graph_fn
():
return
bifpn_generator
(
image_features
)
self
.
execute
(
graph_fn
,
[],
g
)
expected_variables
=
[
'bifpn/node_00/0_up_lvl_6/input_0_up_lvl_5/1x1_pre_sample/conv/bias'
,
'bifpn/node_00/0_up_lvl_6/input_0_up_lvl_5/1x1_pre_sample/conv/kernel'
,
'bifpn/node_03/1_dn_lvl_5/input_0_up_lvl_5/1x1_pre_sample/conv/bias'
,
'bifpn/node_03/1_dn_lvl_5/input_0_up_lvl_5/1x1_pre_sample/conv/kernel'
,
'bifpn/node_04/1_dn_lvl_4/input_0_up_lvl_4/1x1_pre_sample/conv/bias'
,
'bifpn/node_04/1_dn_lvl_4/input_0_up_lvl_4/1x1_pre_sample/conv/kernel'
,
'bifpn/node_05/1_dn_lvl_3/input_0_up_lvl_3/1x1_pre_sample/conv/bias'
,
'bifpn/node_05/1_dn_lvl_3/input_0_up_lvl_3/1x1_pre_sample/conv/kernel'
,
'bifpn/node_06/1_up_lvl_4/input_0_up_lvl_4/1x1_pre_sample/conv/bias'
,
'bifpn/node_06/1_up_lvl_4/input_0_up_lvl_4/1x1_pre_sample/conv/kernel'
,
'bifpn/node_07/1_up_lvl_5/input_0_up_lvl_5/1x1_pre_sample/conv/bias'
,
'bifpn/node_07/1_up_lvl_5/input_0_up_lvl_5/1x1_pre_sample/conv/kernel'
]
expected_node_variable_patterns
=
[
[
'bifpn/node_{:02}/{}_dn_lvl_6/combine/bifpn_combine_weights'
,
'bifpn/node_{:02}/{}_dn_lvl_6/post_combine/separable_conv/bias'
,
'bifpn/node_{:02}/{}_dn_lvl_6/post_combine/separable_conv/depthwise_kernel'
,
'bifpn/node_{:02}/{}_dn_lvl_6/post_combine/separable_conv/pointwise_kernel'
],
[
'bifpn/node_{:02}/{}_dn_lvl_5/combine/bifpn_combine_weights'
,
'bifpn/node_{:02}/{}_dn_lvl_5/post_combine/separable_conv/bias'
,
'bifpn/node_{:02}/{}_dn_lvl_5/post_combine/separable_conv/depthwise_kernel'
,
'bifpn/node_{:02}/{}_dn_lvl_5/post_combine/separable_conv/pointwise_kernel'
],
[
'bifpn/node_{:02}/{}_dn_lvl_4/combine/bifpn_combine_weights'
,
'bifpn/node_{:02}/{}_dn_lvl_4/post_combine/separable_conv/bias'
,
'bifpn/node_{:02}/{}_dn_lvl_4/post_combine/separable_conv/depthwise_kernel'
,
'bifpn/node_{:02}/{}_dn_lvl_4/post_combine/separable_conv/pointwise_kernel'
],
[
'bifpn/node_{:02}/{}_dn_lvl_3/combine/bifpn_combine_weights'
,
'bifpn/node_{:02}/{}_dn_lvl_3/post_combine/separable_conv/bias'
,
'bifpn/node_{:02}/{}_dn_lvl_3/post_combine/separable_conv/depthwise_kernel'
,
'bifpn/node_{:02}/{}_dn_lvl_3/post_combine/separable_conv/pointwise_kernel'
],
[
'bifpn/node_{:02}/{}_up_lvl_4/combine/bifpn_combine_weights'
,
'bifpn/node_{:02}/{}_up_lvl_4/post_combine/separable_conv/bias'
,
'bifpn/node_{:02}/{}_up_lvl_4/post_combine/separable_conv/depthwise_kernel'
,
'bifpn/node_{:02}/{}_up_lvl_4/post_combine/separable_conv/pointwise_kernel'
],
[
'bifpn/node_{:02}/{}_up_lvl_5/combine/bifpn_combine_weights'
,
'bifpn/node_{:02}/{}_up_lvl_5/post_combine/separable_conv/bias'
,
'bifpn/node_{:02}/{}_up_lvl_5/post_combine/separable_conv/depthwise_kernel'
,
'bifpn/node_{:02}/{}_up_lvl_5/post_combine/separable_conv/pointwise_kernel'
],
[
'bifpn/node_{:02}/{}_up_lvl_6/combine/bifpn_combine_weights'
,
'bifpn/node_{:02}/{}_up_lvl_6/post_combine/separable_conv/bias'
,
'bifpn/node_{:02}/{}_up_lvl_6/post_combine/separable_conv/depthwise_kernel'
,
'bifpn/node_{:02}/{}_up_lvl_6/post_combine/separable_conv/pointwise_kernel'
],
[
'bifpn/node_{:02}/{}_up_lvl_7/combine/bifpn_combine_weights'
,
'bifpn/node_{:02}/{}_up_lvl_7/post_combine/separable_conv/bias'
,
'bifpn/node_{:02}/{}_up_lvl_7/post_combine/separable_conv/depthwise_kernel'
,
'bifpn/node_{:02}/{}_up_lvl_7/post_combine/separable_conv/pointwise_kernel'
]]
node_i
=
2
for
iter_i
in
range
(
1
,
bifpn_num_iterations
+
1
):
for
node_variable_patterns
in
expected_node_variable_patterns
:
for
pattern
in
node_variable_patterns
:
expected_variables
.
append
(
pattern
.
format
(
node_i
,
iter_i
))
node_i
+=
1
expected_variables
=
set
(
expected_variables
)
actual_variable_set
=
set
(
[
var
.
name
.
split
(
':'
)[
0
]
for
var
in
bifpn_generator
.
variables
])
self
.
assertSetEqual
(
expected_variables
,
actual_variable_set
)
# TODO(aom): Tests for create_bifpn_combine_op.
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
Prev
1
…
11
12
13
14
15
16
17
18
19
20
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment