"profiler/vscode:/vscode.git/clone" did not exist on "880fbee95782a30fb16654f830502d03dd92fae2"
Commit 31ca3b97 authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

resovle merge conflicts

parents 3e9d886d 7fcd7cba
......@@ -185,6 +185,9 @@ class FakeDetectionModel(model.DetectionModel):
"""
return {var.op.name: var for var in tf.global_variables()}
def restore_from_objects(self, fine_tune_checkpoint_type):
pass
def updates(self):
"""Returns a list of update operators for this model.
......
......@@ -924,13 +924,16 @@ def convert_strided_predictions_to_normalized_keypoints(
def convert_strided_predictions_to_instance_masks(
boxes, classes, masks, stride, mask_height, mask_width,
true_image_shapes, score_threshold=0.5):
boxes, classes, masks, true_image_shapes,
densepose_part_heatmap=None, densepose_surface_coords=None, stride=4,
mask_height=256, mask_width=256, score_threshold=0.5,
densepose_class_index=-1):
"""Converts predicted full-image masks into instance masks.
For each predicted detection box:
* Crop and resize the predicted mask based on the detected bounding box
coordinates and class prediction. Uses bilinear resampling.
* Crop and resize the predicted mask (and optionally DensePose coordinates)
based on the detected bounding box coordinates and class prediction. Uses
bilinear resampling.
* Binarize the mask using the provided score threshold.
Args:
......@@ -940,57 +943,212 @@ def convert_strided_predictions_to_instance_masks(
detected class for each box (0-indexed).
masks: A [batch, output_height, output_width, num_classes] float32
tensor with class probabilities.
true_image_shapes: A tensor of shape [batch, 3] representing the true
shape of the inputs not considering padding.
densepose_part_heatmap: (Optional) A [batch, output_height, output_width,
num_parts] float32 tensor with part scores (i.e. logits).
densepose_surface_coords: (Optional) A [batch, output_height, output_width,
2 * num_parts] float32 tensor with predicted part coordinates (in
vu-format).
stride: The stride in the output space.
mask_height: The desired resized height for instance masks.
mask_width: The desired resized width for instance masks.
true_image_shapes: A tensor of shape [batch, 3] representing the true
shape of the inputs not considering padding.
score_threshold: The threshold at which to convert predicted mask
into foreground pixels.
densepose_class_index: The class index (0-indexed) corresponding to the
class which has DensePose labels (e.g. person class).
Returns:
A [batch_size, max_detections, mask_height, mask_width] uint8 tensor with
predicted foreground mask for each instance. The masks take values in
{0, 1}.
A tuple of masks and surface_coords.
instance_masks: A [batch_size, max_detections, mask_height, mask_width]
uint8 tensor with predicted foreground mask for each
instance. If DensePose tensors are provided, then each pixel value in the
mask encodes the 1-indexed part.
surface_coords: A [batch_size, max_detections, mask_height, mask_width, 2]
float32 tensor with (v, u) coordinates. Note that v, u coordinates are
only defined on instance masks, and the coordinates at each location of
the foreground mask correspond to coordinates on a local part coordinate
system (the specific part can be inferred from the `instance_masks`
output. If DensePose feature maps are not passed to this function, this
output will be None.
Raises:
ValueError: If one but not both of `densepose_part_heatmap` and
`densepose_surface_coords` is provided.
"""
_, output_height, output_width, _ = (
batch_size, output_height, output_width, _ = (
shape_utils.combined_static_and_dynamic_shape(masks))
input_height = stride * output_height
input_width = stride * output_width
true_heights, true_widths, _ = tf.unstack(true_image_shapes, axis=1)
# If necessary, create dummy DensePose tensors to simplify the map function.
densepose_present = True
if ((densepose_part_heatmap is not None) ^
(densepose_surface_coords is not None)):
raise ValueError('To use DensePose, both `densepose_part_heatmap` and '
'`densepose_surface_coords` must be provided')
if densepose_part_heatmap is None and densepose_surface_coords is None:
densepose_present = False
densepose_part_heatmap = tf.zeros(
(batch_size, output_height, output_width, 1), dtype=tf.float32)
densepose_surface_coords = tf.zeros(
(batch_size, output_height, output_width, 2), dtype=tf.float32)
crop_and_threshold_fn = functools.partial(
crop_and_threshold_masks, input_height=input_height,
input_width=input_width, mask_height=mask_height, mask_width=mask_width,
score_threshold=score_threshold,
densepose_class_index=densepose_class_index)
instance_masks, surface_coords = shape_utils.static_or_dynamic_map_fn(
crop_and_threshold_fn,
elems=[boxes, classes, masks, densepose_part_heatmap,
densepose_surface_coords, true_heights, true_widths],
dtype=[tf.uint8, tf.float32],
back_prop=False)
surface_coords = surface_coords if densepose_present else None
return instance_masks, surface_coords
def crop_and_threshold_masks(elems, input_height, input_width, mask_height=256,
mask_width=256, score_threshold=0.5,
densepose_class_index=-1):
"""Crops and thresholds masks based on detection boxes.
Args:
elems: A tuple of
boxes - float32 tensor of shape [max_detections, 4]
classes - int32 tensor of shape [max_detections] (0-indexed)
masks - float32 tensor of shape [output_height, output_width, num_classes]
part_heatmap - float32 tensor of shape [output_height, output_width,
num_parts]
surf_coords - float32 tensor of shape [output_height, output_width,
2 * num_parts]
true_height - scalar int tensor
true_width - scalar int tensor
input_height: Input height to network.
input_width: Input width to network.
mask_height: Height for resizing mask crops.
mask_width: Width for resizing mask crops.
score_threshold: The threshold at which to convert predicted mask
into foreground pixels.
densepose_class_index: scalar int tensor with the class index (0-indexed)
for DensePose.
Returns:
A tuple of
all_instances: A [max_detections, mask_height, mask_width] uint8 tensor
with a predicted foreground mask for each instance. Background is encoded
as 0, and foreground is encoded as a positive integer. Specific part
indices are encoded as 1-indexed parts (for classes that have part
information).
surface_coords: A [max_detections, mask_height, mask_width, 2]
float32 tensor with (v, u) coordinates. for each part.
"""
(boxes, classes, masks, part_heatmap, surf_coords, true_height,
true_width) = elems
# Boxes are in normalized coordinates relative to true image shapes. Convert
# coordinates to be normalized relative to input image shapes (since masks
# may still have padding).
# Then crop and resize each mask.
def crop_and_threshold_masks(args):
"""Crops masks based on detection boxes."""
boxes, classes, masks, true_height, true_width = args
boxlist = box_list.BoxList(boxes)
y_scale = true_height / input_height
x_scale = true_width / input_width
boxlist = box_list_ops.scale(boxlist, y_scale, x_scale)
boxes = boxlist.get()
# Convert masks from [input_height, input_width, num_classes] to
# [num_classes, input_height, input_width, 1].
masks_4d = tf.transpose(masks, perm=[2, 0, 1])[:, :, :, tf.newaxis]
cropped_masks = tf2.image.crop_and_resize(
masks_4d,
boxes=boxes,
box_indices=classes,
crop_size=[mask_height, mask_width],
method='bilinear')
masks_3d = tf.squeeze(cropped_masks, axis=3)
masks_binarized = tf.math.greater_equal(masks_3d, score_threshold)
return tf.cast(masks_binarized, tf.uint8)
boxlist = box_list.BoxList(boxes)
y_scale = true_height / input_height
x_scale = true_width / input_width
boxlist = box_list_ops.scale(boxlist, y_scale, x_scale)
boxes = boxlist.get()
# Convert masks from [output_height, output_width, num_classes] to
# [num_classes, output_height, output_width, 1].
num_classes = tf.shape(masks)[-1]
masks_4d = tf.transpose(masks, perm=[2, 0, 1])[:, :, :, tf.newaxis]
# Tile part and surface coordinate masks for all classes.
part_heatmap_4d = tf.tile(part_heatmap[tf.newaxis, :, :, :],
multiples=[num_classes, 1, 1, 1])
surf_coords_4d = tf.tile(surf_coords[tf.newaxis, :, :, :],
multiples=[num_classes, 1, 1, 1])
feature_maps_concat = tf.concat([masks_4d, part_heatmap_4d, surf_coords_4d],
axis=-1)
# The following tensor has shape
# [max_detections, mask_height, mask_width, 1 + 3 * num_parts].
cropped_masks = tf2.image.crop_and_resize(
feature_maps_concat,
boxes=boxes,
box_indices=classes,
crop_size=[mask_height, mask_width],
method='bilinear')
# Split the cropped masks back into instance masks, part masks, and surface
# coordinates.
num_parts = tf.shape(part_heatmap)[-1]
instance_masks, part_heatmap_cropped, surface_coords_cropped = tf.split(
cropped_masks, [1, num_parts, 2 * num_parts], axis=-1)
# Threshold the instance masks. Resulting tensor has shape
# [max_detections, mask_height, mask_width, 1].
instance_masks_int = tf.cast(
tf.math.greater_equal(instance_masks, score_threshold), dtype=tf.int32)
# Produce a binary mask that is 1.0 only:
# - in the foreground region for an instance
# - in detections corresponding to the DensePose class
det_with_parts = tf.equal(classes, densepose_class_index)
det_with_parts = tf.cast(
tf.reshape(det_with_parts, [-1, 1, 1, 1]), dtype=tf.int32)
instance_masks_with_parts = tf.math.multiply(instance_masks_int,
det_with_parts)
# Similarly, produce a binary mask that holds the foreground masks only for
# instances without parts (i.e. non-DensePose classes).
det_without_parts = 1 - det_with_parts
instance_masks_without_parts = tf.math.multiply(instance_masks_int,
det_without_parts)
# Assemble a tensor that has standard instance segmentation masks for
# non-DensePose classes (with values in [0, 1]), and part segmentation masks
# for DensePose classes (with vaues in [0, 1, ..., num_parts]).
part_mask_int_zero_indexed = tf.math.argmax(
part_heatmap_cropped, axis=-1, output_type=tf.int32)[:, :, :, tf.newaxis]
part_mask_int_one_indexed = part_mask_int_zero_indexed + 1
all_instances = (instance_masks_without_parts +
instance_masks_with_parts * part_mask_int_one_indexed)
# Gather the surface coordinates for the parts.
surface_coords_cropped = tf.reshape(
surface_coords_cropped, [-1, mask_height, mask_width, num_parts, 2])
surface_coords = gather_surface_coords_for_parts(surface_coords_cropped,
part_mask_int_zero_indexed)
surface_coords = (
surface_coords * tf.cast(instance_masks_with_parts, tf.float32))
return [tf.squeeze(all_instances, axis=3), surface_coords]
def gather_surface_coords_for_parts(surface_coords_cropped,
highest_scoring_part):
"""Gathers the (v, u) coordinates for the highest scoring DensePose parts.
true_heights, true_widths, _ = tf.unstack(true_image_shapes, axis=1)
masks_for_image = shape_utils.static_or_dynamic_map_fn(
crop_and_threshold_masks,
elems=[boxes, classes, masks, true_heights, true_widths],
dtype=tf.uint8,
back_prop=False)
masks = tf.stack(masks_for_image, axis=0)
return masks
Args:
surface_coords_cropped: A [max_detections, height, width, num_parts, 2]
float32 tensor with (v, u) surface coordinates.
highest_scoring_part: A [max_detections, height, width] integer tensor with
the highest scoring part (0-indexed) indices for each location.
Returns:
A [max_detections, height, width, 2] float32 tensor with the (v, u)
coordinates selected from the highest scoring parts.
"""
max_detections, height, width, num_parts, _ = (
shape_utils.combined_static_and_dynamic_shape(surface_coords_cropped))
flattened_surface_coords = tf.reshape(surface_coords_cropped, [-1, 2])
flattened_part_ids = tf.reshape(highest_scoring_part, [-1])
# Produce lookup indices that represent the locations of the highest scoring
# parts in the `flattened_surface_coords` tensor.
flattened_lookup_indices = (
num_parts * tf.range(max_detections * height * width) +
flattened_part_ids)
vu_coords_flattened = tf.gather(flattened_surface_coords,
flattened_lookup_indices, axis=0)
return tf.reshape(vu_coords_flattened, [max_detections, height, width, 2])
class ObjectDetectionParams(
......@@ -1235,6 +1393,64 @@ class MaskParams(
score_threshold, heatmap_bias_init)
class DensePoseParams(
collections.namedtuple('DensePoseParams', [
'class_id', 'classification_loss', 'localization_loss',
'part_loss_weight', 'coordinate_loss_weight', 'num_parts',
'task_loss_weight', 'upsample_to_input_res', 'upsample_method',
'heatmap_bias_init'
])):
"""Namedtuple to store DensePose prediction related parameters."""
__slots__ = ()
def __new__(cls,
class_id,
classification_loss,
localization_loss,
part_loss_weight=1.0,
coordinate_loss_weight=1.0,
num_parts=24,
task_loss_weight=1.0,
upsample_to_input_res=True,
upsample_method='bilinear',
heatmap_bias_init=-2.19):
"""Constructor with default values for DensePoseParams.
Args:
class_id: the ID of the class that contains the DensePose groundtruth.
This should typically correspond to the "person" class. Note that the ID
is 0-based, meaning that class 0 corresponds to the first non-background
object class.
classification_loss: an object_detection.core.losses.Loss object to
compute the loss for the body part predictions in CenterNet.
localization_loss: an object_detection.core.losses.Loss object to compute
the loss for the surface coordinate regression in CenterNet.
part_loss_weight: The loss weight to apply to part prediction.
coordinate_loss_weight: The loss weight to apply to surface coordinate
prediction.
num_parts: The number of DensePose parts to predict.
task_loss_weight: float, the loss weight for the DensePose task.
upsample_to_input_res: Whether to upsample the DensePose feature maps to
the input resolution before applying loss. Note that the prediction
outputs are still at the standard CenterNet output stride.
upsample_method: Method for upsampling DensePose feature maps. Options are
either 'bilinear' or 'nearest'). This takes no effect when
`upsample_to_input_res` is False.
heatmap_bias_init: float, the initial value of bias in the convolutional
kernel of the part prediction head. If set to None, the
bias is initialized with zeros.
Returns:
An initialized DensePoseParams namedtuple.
"""
return super(DensePoseParams,
cls).__new__(cls, class_id, classification_loss,
localization_loss, part_loss_weight,
coordinate_loss_weight, num_parts,
task_loss_weight, upsample_to_input_res,
upsample_method, heatmap_bias_init)
# The following constants are used to generate the keys of the
# (prediction, loss, target assigner,...) dictionaries used in CenterNetMetaArch
# class.
......@@ -1247,6 +1463,9 @@ KEYPOINT_HEATMAP = 'keypoint/heatmap'
KEYPOINT_OFFSET = 'keypoint/offset'
SEGMENTATION_TASK = 'segmentation_task'
SEGMENTATION_HEATMAP = 'segmentation/heatmap'
DENSEPOSE_TASK = 'densepose_task'
DENSEPOSE_HEATMAP = 'densepose/heatmap'
DENSEPOSE_REGRESSION = 'densepose/regression'
LOSS_KEY_PREFIX = 'Loss'
......@@ -1290,7 +1509,8 @@ class CenterNetMetaArch(model.DetectionModel):
object_center_params,
object_detection_params=None,
keypoint_params_dict=None,
mask_params=None):
mask_params=None,
densepose_params=None):
"""Initializes a CenterNet model.
Args:
......@@ -1318,6 +1538,10 @@ class CenterNetMetaArch(model.DetectionModel):
mask_params: A MaskParams namedtuple. This object
holds the hyper-parameters for segmentation. Please see the class
definition for more details.
densepose_params: A DensePoseParams namedtuple. This object holds the
hyper-parameters for DensePose prediction. Please see the class
definition for more details. Note that if this is provided, it is
expected that `mask_params` is also provided.
"""
assert object_detection_params or keypoint_params_dict
# Shorten the name for convenience and better formatting.
......@@ -1333,6 +1557,10 @@ class CenterNetMetaArch(model.DetectionModel):
self._od_params = object_detection_params
self._kp_params_dict = keypoint_params_dict
self._mask_params = mask_params
if densepose_params is not None and mask_params is None:
raise ValueError('To run DensePose prediction, `mask_params` must also '
'be supplied.')
self._densepose_params = densepose_params
# Construct the prediction head nets.
self._prediction_head_dict = self._construct_prediction_heads(
......@@ -1413,8 +1641,18 @@ class CenterNetMetaArch(model.DetectionModel):
if self._mask_params is not None:
prediction_heads[SEGMENTATION_HEATMAP] = [
make_prediction_net(num_classes,
bias_fill=class_prediction_bias_init)
bias_fill=self._mask_params.heatmap_bias_init)
for _ in range(num_feature_outputs)]
if self._densepose_params is not None:
prediction_heads[DENSEPOSE_HEATMAP] = [
make_prediction_net( # pylint: disable=g-complex-comprehension
self._densepose_params.num_parts,
bias_fill=self._densepose_params.heatmap_bias_init)
for _ in range(num_feature_outputs)]
prediction_heads[DENSEPOSE_REGRESSION] = [
make_prediction_net(2 * self._densepose_params.num_parts)
for _ in range(num_feature_outputs)
]
return prediction_heads
def _initialize_target_assigners(self, stride, min_box_overlap_iou):
......@@ -1449,6 +1687,10 @@ class CenterNetMetaArch(model.DetectionModel):
if self._mask_params is not None:
target_assigners[SEGMENTATION_TASK] = (
cn_assigner.CenterNetMaskTargetAssigner(stride))
if self._densepose_params is not None:
dp_stride = 1 if self._densepose_params.upsample_to_input_res else stride
target_assigners[DENSEPOSE_TASK] = (
cn_assigner.CenterNetDensePoseTargetAssigner(dp_stride))
return target_assigners
......@@ -1860,6 +2102,113 @@ class CenterNetMetaArch(model.DetectionModel):
float(len(segmentation_predictions)) * total_pixels_in_loss)
return total_loss
def _compute_densepose_losses(self, input_height, input_width,
prediction_dict):
"""Computes the weighted DensePose losses.
Args:
input_height: An integer scalar tensor representing input image height.
input_width: An integer scalar tensor representing input image width.
prediction_dict: A dictionary holding predicted tensors output by the
"predict" function. See the "predict" function for more detailed
description.
Returns:
A dictionary of scalar float tensors representing the weighted losses for
the DensePose task:
DENSEPOSE_HEATMAP: the weighted part segmentation loss.
DENSEPOSE_REGRESSION: the weighted part surface coordinate loss.
"""
dp_heatmap_loss, dp_regression_loss = (
self._compute_densepose_part_and_coordinate_losses(
input_height=input_height,
input_width=input_width,
part_predictions=prediction_dict[DENSEPOSE_HEATMAP],
surface_coord_predictions=prediction_dict[DENSEPOSE_REGRESSION]))
loss_dict = {}
loss_dict[DENSEPOSE_HEATMAP] = (
self._densepose_params.part_loss_weight * dp_heatmap_loss)
loss_dict[DENSEPOSE_REGRESSION] = (
self._densepose_params.coordinate_loss_weight * dp_regression_loss)
return loss_dict
def _compute_densepose_part_and_coordinate_losses(
self, input_height, input_width, part_predictions,
surface_coord_predictions):
"""Computes the individual losses for the DensePose task.
Args:
input_height: An integer scalar tensor representing input image height.
input_width: An integer scalar tensor representing input image width.
part_predictions: A list of float tensors of shape [batch_size,
out_height, out_width, num_parts].
surface_coord_predictions: A list of float tensors of shape [batch_size,
out_height, out_width, 2 * num_parts].
Returns:
A tuple with two scalar loss tensors: part_prediction_loss and
surface_coord_loss.
"""
gt_dp_num_points_list = self.groundtruth_lists(
fields.BoxListFields.densepose_num_points)
gt_dp_part_ids_list = self.groundtruth_lists(
fields.BoxListFields.densepose_part_ids)
gt_dp_surface_coords_list = self.groundtruth_lists(
fields.BoxListFields.densepose_surface_coords)
gt_weights_list = self.groundtruth_lists(fields.BoxListFields.weights)
assigner = self._target_assigner_dict[DENSEPOSE_TASK]
batch_indices, batch_part_ids, batch_surface_coords, batch_weights = (
assigner.assign_part_and_coordinate_targets(
height=input_height,
width=input_width,
gt_dp_num_points_list=gt_dp_num_points_list,
gt_dp_part_ids_list=gt_dp_part_ids_list,
gt_dp_surface_coords_list=gt_dp_surface_coords_list,
gt_weights_list=gt_weights_list))
part_prediction_loss = 0
surface_coord_loss = 0
classification_loss_fn = self._densepose_params.classification_loss
localization_loss_fn = self._densepose_params.localization_loss
num_predictions = float(len(part_predictions))
num_valid_points = tf.math.count_nonzero(batch_weights)
num_valid_points = tf.cast(tf.math.maximum(num_valid_points, 1), tf.float32)
for part_pred, surface_coord_pred in zip(part_predictions,
surface_coord_predictions):
# Potentially upsample the feature maps, so that better quality (i.e.
# higher res) groundtruth can be applied.
if self._densepose_params.upsample_to_input_res:
part_pred = tf.keras.layers.UpSampling2D(
self._stride, interpolation=self._densepose_params.upsample_method)(
part_pred)
surface_coord_pred = tf.keras.layers.UpSampling2D(
self._stride, interpolation=self._densepose_params.upsample_method)(
surface_coord_pred)
# Compute the part prediction loss.
part_pred = cn_assigner.get_batch_predictions_from_indices(
part_pred, batch_indices[:, 0:3])
part_prediction_loss += classification_loss_fn(
part_pred[:, tf.newaxis, :],
batch_part_ids[:, tf.newaxis, :],
weights=batch_weights[:, tf.newaxis, tf.newaxis])
# Compute the surface coordinate loss.
batch_size, out_height, out_width, _ = _get_shape(
surface_coord_pred, 4)
surface_coord_pred = tf.reshape(
surface_coord_pred, [batch_size, out_height, out_width, -1, 2])
surface_coord_pred = cn_assigner.get_batch_predictions_from_indices(
surface_coord_pred, batch_indices)
surface_coord_loss += localization_loss_fn(
surface_coord_pred,
batch_surface_coords,
weights=batch_weights[:, tf.newaxis])
part_prediction_loss = tf.reduce_sum(part_prediction_loss) / (
num_predictions * num_valid_points)
surface_coord_loss = tf.reduce_sum(surface_coord_loss) / (
num_predictions * num_valid_points)
return part_prediction_loss, surface_coord_loss
def preprocess(self, inputs):
outputs = shape_utils.resize_images_and_return_shapes(
inputs, self._image_resizer_fn)
......@@ -1909,6 +2258,13 @@ class CenterNetMetaArch(model.DetectionModel):
'segmentation/heatmap' - [optional] A list of size num_feature_outputs
holding float tensors of size [batch_size, output_height,
output_width, num_classes] representing the mask logits.
'densepose/heatmap' - [optional] A list of size num_feature_outputs
holding float tensors of size [batch_size, output_height,
output_width, num_parts] representing the mask logits for each part.
'densepose/regression' - [optional] A list of size num_feature_outputs
holding float tensors of size [batch_size, output_height,
output_width, 2 * num_parts] representing the DensePose surface
coordinate predictions.
Note the $TASK_NAME is provided by the KeypointEstimation namedtuple
used to differentiate between different keypoint tasks.
"""
......@@ -1938,10 +2294,16 @@ class CenterNetMetaArch(model.DetectionModel):
scope: Optional scope name.
Returns:
A dictionary mapping the keys ['Loss/object_center', 'Loss/box/scale',
'Loss/box/offset', 'Loss/$TASK_NAME/keypoint/heatmap',
'Loss/$TASK_NAME/keypoint/offset',
'Loss/$TASK_NAME/keypoint/regression', 'Loss/segmentation/heatmap'] to
A dictionary mapping the keys [
'Loss/object_center',
'Loss/box/scale', (optional)
'Loss/box/offset', (optional)
'Loss/$TASK_NAME/keypoint/heatmap', (optional)
'Loss/$TASK_NAME/keypoint/offset', (optional)
'Loss/$TASK_NAME/keypoint/regression', (optional)
'Loss/segmentation/heatmap', (optional)
'Loss/densepose/heatmap', (optional)
'Loss/densepose/regression]' (optional)
scalar tensors corresponding to the losses for different tasks. Note the
$TASK_NAME is provided by the KeypointEstimation namedtuple used to
differentiate between different keypoint tasks.
......@@ -1999,6 +2361,16 @@ class CenterNetMetaArch(model.DetectionModel):
seg_losses[key] = seg_losses[key] * self._mask_params.task_loss_weight
losses.update(seg_losses)
if self._densepose_params is not None:
densepose_losses = self._compute_densepose_losses(
input_height=input_height,
input_width=input_width,
prediction_dict=prediction_dict)
for key in densepose_losses:
densepose_losses[key] = (
densepose_losses[key] * self._densepose_params.task_loss_weight)
losses.update(densepose_losses)
# Prepend the LOSS_KEY_PREFIX to the keys in the dictionary such that the
# losses will be grouped together in Tensorboard.
return dict([('%s/%s' % (LOSS_KEY_PREFIX, key), val)
......@@ -2033,9 +2405,14 @@ class CenterNetMetaArch(model.DetectionModel):
invalid keypoints have their coordinates and scores set to 0.0.
detection_keypoint_scores: (Optional) A float tensor of shape [batch,
max_detection, num_keypoints] with scores for each keypoint.
detection_masks: (Optional) An int tensor of shape [batch,
max_detections, mask_height, mask_width] with binarized masks for each
detection.
detection_masks: (Optional) A uint8 tensor of shape [batch,
max_detections, mask_height, mask_width] with masks for each
detection. Background is specified with 0, and foreground is specified
with positive integers (1 for standard instance segmentation mask, and
1-indexed parts for DensePose task).
detection_surface_coords: (Optional) A float32 tensor of shape [batch,
max_detection, mask_height, mask_width, 2] with DensePose surface
coordinates, in (v, u) format.
"""
object_center_prob = tf.nn.sigmoid(prediction_dict[OBJECT_CENTER][-1])
# Get x, y and channel indices corresponding to the top indices in the class
......@@ -2076,14 +2453,27 @@ class CenterNetMetaArch(model.DetectionModel):
if self._mask_params:
masks = tf.nn.sigmoid(prediction_dict[SEGMENTATION_HEATMAP][-1])
instance_masks = convert_strided_predictions_to_instance_masks(
boxes, classes, masks, self._stride, self._mask_params.mask_height,
self._mask_params.mask_width, true_image_shapes,
self._mask_params.score_threshold)
postprocess_dict.update({
fields.DetectionResultFields.detection_masks:
instance_masks
})
densepose_part_heatmap, densepose_surface_coords = None, None
densepose_class_index = 0
if self._densepose_params:
densepose_part_heatmap = prediction_dict[DENSEPOSE_HEATMAP][-1]
densepose_surface_coords = prediction_dict[DENSEPOSE_REGRESSION][-1]
densepose_class_index = self._densepose_params.class_id
instance_masks, surface_coords = (
convert_strided_predictions_to_instance_masks(
boxes, classes, masks, true_image_shapes,
densepose_part_heatmap, densepose_surface_coords,
stride=self._stride, mask_height=self._mask_params.mask_height,
mask_width=self._mask_params.mask_width,
score_threshold=self._mask_params.score_threshold,
densepose_class_index=densepose_class_index))
postprocess_dict[
fields.DetectionResultFields.detection_masks] = instance_masks
if self._densepose_params:
postprocess_dict[
fields.DetectionResultFields.detection_surface_coords] = (
surface_coords)
return postprocess_dict
def _postprocess_keypoints(self, prediction_dict, classes, y_indices,
......@@ -2330,17 +2720,61 @@ class CenterNetMetaArch(model.DetectionModel):
def regularization_losses(self):
return []
def restore_map(self, fine_tune_checkpoint_type='classification',
def restore_map(self,
fine_tune_checkpoint_type='detection',
load_all_detection_checkpoint_vars=False):
raise RuntimeError('CenterNetMetaArch not supported under TF1.x.')
def restore_from_objects(self, fine_tune_checkpoint_type='detection'):
"""Returns a map of Trackable objects to load from a foreign checkpoint.
Returns a dictionary of Tensorflow 2 Trackable objects (e.g. tf.Module
or Checkpoint). This enables the model to initialize based on weights from
another task. For example, the feature extractor variables from a
classification model can be used to bootstrap training of an object
detector. When loading from an object detection model, the checkpoint model
should have the same parameters as this detection model with exception of
the num_classes parameter.
Note that this function is intended to be used to restore Keras-based
models when running Tensorflow 2, whereas restore_map (not implemented
in CenterNet) is intended to be used to restore Slim-based models when
running Tensorflow 1.x.
TODO(jonathanhuang): Make this function consistent with other
meta-architectures.
Args:
fine_tune_checkpoint_type: whether to restore from a full detection
checkpoint (with compatible variable names) or to restore from a
classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`. Default 'detection'.
'detection': used when loading in the Hourglass model pre-trained on
other detection task.
'classification': used when loading in the ResNet model pre-trained on
image classification task. Note that only the image feature encoding
part is loaded but not those upsampling layers.
'fine_tune': used when loading the entire CenterNet feature extractor
pre-trained on other tasks. The checkpoints saved during CenterNet
model training can be directly loaded using this mode.
Returns:
A dict mapping keys to Trackable objects (tf.Module or Checkpoint).
"""
if fine_tune_checkpoint_type == 'classification':
return {'feature_extractor': self._feature_extractor.get_base_model()}
if fine_tune_checkpoint_type == 'detection':
elif fine_tune_checkpoint_type == 'detection':
return {'feature_extractor': self._feature_extractor.get_model()}
elif fine_tune_checkpoint_type == 'fine_tune':
feature_extractor_model = tf.train.Checkpoint(
_feature_extractor=self._feature_extractor)
return {'model': feature_extractor_model}
else:
raise ValueError('Unknown fine tune checkpoint type - {}'.format(
raise ValueError('Not supported fine tune checkpoint type - {}'.format(
fine_tune_checkpoint_type))
def updates(self):
......
......@@ -266,7 +266,7 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
masks_np[0, :, :3, 1] = 1 # Class 1.
masks = tf.constant(masks_np)
true_image_shapes = tf.constant([[6, 8, 3]])
instance_masks = cnma.convert_strided_predictions_to_instance_masks(
instance_masks, _ = cnma.convert_strided_predictions_to_instance_masks(
boxes, classes, masks, stride=2, mask_height=2, mask_width=2,
true_image_shapes=true_image_shapes)
return instance_masks
......@@ -289,6 +289,104 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
])
np.testing.assert_array_equal(expected_instance_masks, instance_masks)
def test_convert_strided_predictions_raises_error_with_one_tensor(self):
def graph_fn():
boxes = tf.constant(
[
[[0.5, 0.5, 1.0, 1.0],
[0.0, 0.5, 0.5, 1.0],
[0.0, 0.0, 0.0, 0.0]],
], tf.float32)
classes = tf.constant(
[
[0, 1, 0],
], tf.int32)
masks_np = np.zeros((1, 4, 4, 2), dtype=np.float32)
masks_np[0, :, 2:, 0] = 1 # Class 0.
masks_np[0, :, :3, 1] = 1 # Class 1.
masks = tf.constant(masks_np)
true_image_shapes = tf.constant([[6, 8, 3]])
densepose_part_heatmap = tf.random.uniform(
[1, 4, 4, 24])
instance_masks, _ = cnma.convert_strided_predictions_to_instance_masks(
boxes, classes, masks, true_image_shapes,
densepose_part_heatmap=densepose_part_heatmap,
densepose_surface_coords=None)
return instance_masks
with self.assertRaises(ValueError):
self.execute_cpu(graph_fn, [])
def test_crop_and_threshold_masks(self):
boxes_np = np.array(
[[0., 0., 0.5, 0.5],
[0.25, 0.25, 1.0, 1.0]], dtype=np.float32)
classes_np = np.array([0, 2], dtype=np.int32)
masks_np = np.zeros((4, 4, _NUM_CLASSES), dtype=np.float32)
masks_np[0, 0, 0] = 0.8
masks_np[1, 1, 0] = 0.6
masks_np[3, 3, 2] = 0.7
part_heatmap_np = np.zeros((4, 4, _DENSEPOSE_NUM_PARTS), dtype=np.float32)
part_heatmap_np[0, 0, 4] = 1
part_heatmap_np[0, 0, 2] = 0.6 # Lower scoring.
part_heatmap_np[1, 1, 8] = 0.2
part_heatmap_np[3, 3, 4] = 0.5
surf_coords_np = np.zeros((4, 4, 2 * _DENSEPOSE_NUM_PARTS),
dtype=np.float32)
surf_coords_np[:, :, 8:10] = 0.2, 0.9
surf_coords_np[:, :, 16:18] = 0.3, 0.5
true_height, true_width = 10, 10
input_height, input_width = 10, 10
mask_height = 4
mask_width = 4
def graph_fn():
elems = [
tf.constant(boxes_np),
tf.constant(classes_np),
tf.constant(masks_np),
tf.constant(part_heatmap_np),
tf.constant(surf_coords_np),
tf.constant(true_height, dtype=tf.int32),
tf.constant(true_width, dtype=tf.int32)
]
part_masks, surface_coords = cnma.crop_and_threshold_masks(
elems, input_height, input_width, mask_height=mask_height,
mask_width=mask_width, densepose_class_index=0)
return part_masks, surface_coords
part_masks, surface_coords = self.execute_cpu(graph_fn, [])
expected_part_masks = np.zeros((2, 4, 4), dtype=np.uint8)
expected_part_masks[0, 0, 0] = 5 # Recall classes are 1-indexed in output.
expected_part_masks[0, 2, 2] = 9 # Recall classes are 1-indexed in output.
expected_part_masks[1, 3, 3] = 1 # Standard instance segmentation mask.
expected_surface_coords = np.zeros((2, 4, 4, 2), dtype=np.float32)
expected_surface_coords[0, 0, 0, :] = 0.2, 0.9
expected_surface_coords[0, 2, 2, :] = 0.3, 0.5
np.testing.assert_allclose(expected_part_masks, part_masks)
np.testing.assert_allclose(expected_surface_coords, surface_coords)
def test_gather_surface_coords_for_parts(self):
surface_coords_cropped_np = np.zeros((2, 5, 5, _DENSEPOSE_NUM_PARTS, 2),
dtype=np.float32)
surface_coords_cropped_np[0, 0, 0, 5] = 0.3, 0.4
surface_coords_cropped_np[0, 1, 0, 9] = 0.5, 0.6
highest_scoring_part_np = np.zeros((2, 5, 5), dtype=np.int32)
highest_scoring_part_np[0, 0, 0] = 5
highest_scoring_part_np[0, 1, 0] = 9
def graph_fn():
surface_coords_cropped = tf.constant(surface_coords_cropped_np,
tf.float32)
highest_scoring_part = tf.constant(highest_scoring_part_np, tf.int32)
surface_coords_gathered = cnma.gather_surface_coords_for_parts(
surface_coords_cropped, highest_scoring_part)
return surface_coords_gathered
surface_coords_gathered = self.execute_cpu(graph_fn, [])
np.testing.assert_allclose([0.3, 0.4], surface_coords_gathered[0, 0, 0])
np.testing.assert_allclose([0.5, 0.6], surface_coords_gathered[0, 1, 0])
def test_top_k_feature_map_locations(self):
feature_map_np = np.zeros((2, 3, 3, 2), dtype=np.float32)
feature_map_np[0, 2, 0, 1] = 1.0
......@@ -535,6 +633,8 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
keypoint_heatmap_np[1, 0, 1, 1] = 0.9
keypoint_heatmap_np[1, 2, 0, 1] = 0.8
# Note that the keypoint offsets are now per keypoint (as opposed to
# keypoint agnostic, in the test test_keypoint_candidate_prediction).
keypoint_heatmap_offsets_np = np.zeros((2, 3, 3, 4), dtype=np.float32)
keypoint_heatmap_offsets_np[0, 0, 0] = [0.5, 0.25, 0.0, 0.0]
keypoint_heatmap_offsets_np[0, 2, 1] = [-0.25, 0.5, 0.0, 0.0]
......@@ -949,6 +1049,7 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
_NUM_CLASSES = 10
_KEYPOINT_INDICES = [0, 1, 2, 3]
_NUM_KEYPOINTS = len(_KEYPOINT_INDICES)
_DENSEPOSE_NUM_PARTS = 24
_TASK_NAME = 'human_pose'
......@@ -991,6 +1092,20 @@ def get_fake_mask_params():
mask_width=4)
def get_fake_densepose_params():
"""Returns the fake DensePose estimation parameter namedtuple."""
return cnma.DensePoseParams(
class_id=1,
classification_loss=losses.WeightedSoftmaxClassificationLoss(),
localization_loss=losses.L1LocalizationLoss(),
part_loss_weight=1.0,
coordinate_loss_weight=1.0,
num_parts=_DENSEPOSE_NUM_PARTS,
task_loss_weight=1.0,
upsample_to_input_res=True,
upsample_method='nearest')
def build_center_net_meta_arch(build_resnet=False):
"""Builds the CenterNet meta architecture."""
if build_resnet:
......@@ -1018,7 +1133,8 @@ def build_center_net_meta_arch(build_resnet=False):
object_center_params=get_fake_center_params(),
object_detection_params=get_fake_od_params(),
keypoint_params_dict={_TASK_NAME: get_fake_kp_params()},
mask_params=get_fake_mask_params())
mask_params=get_fake_mask_params(),
densepose_params=get_fake_densepose_params())
def _logit(p):
......@@ -1102,6 +1218,16 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
fake_feature_map)
self.assertEqual((4, 128, 128, _NUM_CLASSES), output.shape)
# "densepose parts" head:
output = model._prediction_head_dict[cnma.DENSEPOSE_HEATMAP][-1](
fake_feature_map)
self.assertEqual((4, 128, 128, _DENSEPOSE_NUM_PARTS), output.shape)
# "densepose surface coordinates" head:
output = model._prediction_head_dict[cnma.DENSEPOSE_REGRESSION][-1](
fake_feature_map)
self.assertEqual((4, 128, 128, 2 * _DENSEPOSE_NUM_PARTS), output.shape)
def test_initialize_target_assigners(self):
model = build_center_net_meta_arch()
assigner_dict = model._initialize_target_assigners(
......@@ -1125,6 +1251,10 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
self.assertIsInstance(assigner_dict[cnma.SEGMENTATION_TASK],
cn_assigner.CenterNetMaskTargetAssigner)
# DensePose estimation target assigner:
self.assertIsInstance(assigner_dict[cnma.DENSEPOSE_TASK],
cn_assigner.CenterNetDensePoseTargetAssigner)
def test_predict(self):
"""Test the predict function."""
......@@ -1145,6 +1275,10 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
(2, 32, 32, 2))
self.assertEqual(prediction_dict[cnma.SEGMENTATION_HEATMAP][0].shape,
(2, 32, 32, _NUM_CLASSES))
self.assertEqual(prediction_dict[cnma.DENSEPOSE_HEATMAP][0].shape,
(2, 32, 32, _DENSEPOSE_NUM_PARTS))
self.assertEqual(prediction_dict[cnma.DENSEPOSE_REGRESSION][0].shape,
(2, 32, 32, 2 * _DENSEPOSE_NUM_PARTS))
def test_loss(self):
"""Test the loss function."""
......@@ -1157,7 +1291,13 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
groundtruth_keypoints_list=groundtruth_dict[
fields.BoxListFields.keypoints],
groundtruth_masks_list=groundtruth_dict[
fields.BoxListFields.masks])
fields.BoxListFields.masks],
groundtruth_dp_num_points_list=groundtruth_dict[
fields.BoxListFields.densepose_num_points],
groundtruth_dp_part_ids_list=groundtruth_dict[
fields.BoxListFields.densepose_part_ids],
groundtruth_dp_surface_coords_list=groundtruth_dict[
fields.BoxListFields.densepose_surface_coords])
prediction_dict = get_fake_prediction_dict(
input_height=16, input_width=32, stride=4)
......@@ -1193,6 +1333,12 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
self.assertGreater(
0.01, loss_dict['%s/%s' % (cnma.LOSS_KEY_PREFIX,
cnma.SEGMENTATION_HEATMAP)])
self.assertGreater(
0.01, loss_dict['%s/%s' % (cnma.LOSS_KEY_PREFIX,
cnma.DENSEPOSE_HEATMAP)])
self.assertGreater(
0.01, loss_dict['%s/%s' % (cnma.LOSS_KEY_PREFIX,
cnma.DENSEPOSE_REGRESSION)])
@parameterized.parameters(
{'target_class_id': 1},
......@@ -1230,6 +1376,14 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
segmentation_heatmap[:, 14:18, 14:18, target_class_id] = 1.0
segmentation_heatmap = _logit(segmentation_heatmap)
dp_part_ind = 4
dp_part_heatmap = np.zeros((1, 32, 32, _DENSEPOSE_NUM_PARTS),
dtype=np.float32)
dp_part_heatmap[0, 14:18, 14:18, dp_part_ind] = 1.0
dp_part_heatmap = _logit(dp_part_heatmap)
dp_surf_coords = np.random.randn(1, 32, 32, 2 * _DENSEPOSE_NUM_PARTS)
class_center = tf.constant(class_center)
height_width = tf.constant(height_width)
offset = tf.constant(offset)
......@@ -1237,6 +1391,8 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
keypoint_offsets = tf.constant(keypoint_offsets, dtype=tf.float32)
keypoint_regression = tf.constant(keypoint_regression, dtype=tf.float32)
segmentation_heatmap = tf.constant(segmentation_heatmap, dtype=tf.float32)
dp_part_heatmap = tf.constant(dp_part_heatmap, dtype=tf.float32)
dp_surf_coords = tf.constant(dp_surf_coords, dtype=tf.float32)
prediction_dict = {
cnma.OBJECT_CENTER: [class_center],
......@@ -1249,6 +1405,8 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_REGRESSION):
[keypoint_regression],
cnma.SEGMENTATION_HEATMAP: [segmentation_heatmap],
cnma.DENSEPOSE_HEATMAP: [dp_part_heatmap],
cnma.DENSEPOSE_REGRESSION: [dp_surf_coords]
}
def graph_fn():
......@@ -1271,12 +1429,13 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
self.assertAllEqual([1, max_detection, 4, 4],
detections['detection_masks'].shape)
# There should be some section of the first mask (correspond to the only
# detection) with non-zero mask values.
self.assertGreater(np.sum(detections['detection_masks'][0, 0, :, :] > 0), 0)
# Masks should be empty for everything but the first detection.
self.assertAllEqual(
detections['detection_masks'][0, 1:, :, :],
np.zeros_like(detections['detection_masks'][0, 1:, :, :]))
self.assertAllEqual(
detections['detection_surface_coords'][0, 1:, :, :],
np.zeros_like(detections['detection_surface_coords'][0, 1:, :, :]))
if target_class_id == 1:
expected_kpts_for_obj_0 = np.array(
......@@ -1287,6 +1446,12 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
expected_kpts_for_obj_0, rtol=1e-6)
np.testing.assert_allclose(detections['detection_keypoint_scores'][0][0],
expected_kpt_scores_for_obj_0, rtol=1e-6)
# First detection has DensePose parts.
self.assertSameElements(
np.unique(detections['detection_masks'][0, 0, :, :]),
set([0, dp_part_ind + 1]))
self.assertGreater(np.sum(np.abs(detections['detection_surface_coords'])),
0.0)
else:
# All keypoint outputs should be zeros.
np.testing.assert_allclose(
......@@ -1297,6 +1462,14 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
detections['detection_keypoint_scores'][0][0],
np.zeros([num_keypoints], np.float),
rtol=1e-6)
# Binary segmentation mask.
self.assertSameElements(
np.unique(detections['detection_masks'][0, 0, :, :]),
set([0, 1]))
# No DensePose surface coordinates.
np.testing.assert_allclose(
detections['detection_surface_coords'][0, 0, :, :],
np.zeros_like(detections['detection_surface_coords'][0, 0, :, :]))
def test_get_instance_indices(self):
classes = tf.constant([[0, 1, 2, 0], [2, 1, 2, 2]], dtype=tf.int32)
......@@ -1353,6 +1526,17 @@ def get_fake_prediction_dict(input_height, input_width, stride):
mask_heatmap[0, 2, 4, 1] = 1.0
mask_heatmap = _logit(mask_heatmap)
densepose_heatmap = np.zeros((2, output_height, output_width,
_DENSEPOSE_NUM_PARTS), dtype=np.float32)
densepose_heatmap[0, 2, 4, 5] = 1.0
densepose_heatmap = _logit(densepose_heatmap)
densepose_regression = np.zeros((2, output_height, output_width,
2 * _DENSEPOSE_NUM_PARTS), dtype=np.float32)
# The surface coordinate indices for part index 5 are:
# (5 * 2, 5 * 2 + 1), or (10, 11).
densepose_regression[0, 2, 4, 10:12] = 0.4, 0.7
prediction_dict = {
'preprocessed_inputs':
tf.zeros((2, input_height, input_width, 3)),
......@@ -1383,6 +1567,14 @@ def get_fake_prediction_dict(input_height, input_width, stride):
cnma.SEGMENTATION_HEATMAP: [
tf.constant(mask_heatmap),
tf.constant(mask_heatmap)
],
cnma.DENSEPOSE_HEATMAP: [
tf.constant(densepose_heatmap),
tf.constant(densepose_heatmap),
],
cnma.DENSEPOSE_REGRESSION: [
tf.constant(densepose_regression),
tf.constant(densepose_regression),
]
}
return prediction_dict
......@@ -1427,12 +1619,30 @@ def get_fake_groundtruth_dict(input_height, input_width, stride):
tf.constant(mask),
tf.zeros_like(mask),
]
densepose_num_points = [
tf.constant([1], dtype=tf.int32),
tf.constant([0], dtype=tf.int32),
]
densepose_part_ids = [
tf.constant([[5, 0, 0]], dtype=tf.int32),
tf.constant([[0, 0, 0]], dtype=tf.int32),
]
densepose_surface_coords_np = np.zeros((1, 3, 4), dtype=np.float32)
densepose_surface_coords_np[0, 0, :] = 0.55, 0.55, 0.4, 0.7
densepose_surface_coords = [
tf.constant(densepose_surface_coords_np),
tf.zeros_like(densepose_surface_coords_np)
]
groundtruth_dict = {
fields.BoxListFields.boxes: boxes,
fields.BoxListFields.weights: weights,
fields.BoxListFields.classes: classes,
fields.BoxListFields.keypoints: keypoints,
fields.BoxListFields.masks: masks,
fields.BoxListFields.densepose_num_points: densepose_num_points,
fields.BoxListFields.densepose_part_ids: densepose_part_ids,
fields.BoxListFields.densepose_surface_coords:
densepose_surface_coords,
fields.InputDataFields.groundtruth_labeled_classes: labeled_classes,
}
return groundtruth_dict
......@@ -1574,8 +1784,9 @@ class CenterNetMetaArchRestoreTest(test_case.TestCase):
"""Test restore map for a resnet backbone."""
model = build_center_net_meta_arch(build_resnet=True)
restore_map = model.restore_map('classification')
self.assertIsInstance(restore_map['feature_extractor'], tf.keras.Model)
restore_from_objects_map = model.restore_from_objects('classification')
self.assertIsInstance(restore_from_objects_map['feature_extractor'],
tf.keras.Model)
class DummyFeatureExtractor(cnma.CenterNetFeatureExtractor):
......@@ -1601,9 +1812,6 @@ class DummyFeatureExtractor(cnma.CenterNetFeatureExtractor):
def postprocess(self):
pass
def restore_map(self):
pass
def call(self, inputs):
batch_size, input_height, input_width, _ = inputs.shape
fake_output = tf.ones([
......
......@@ -324,7 +324,7 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
A float32 Tensor with shape [K, new_height, new_width, depth].
"""
box_features = self._crop_and_resize_fn(
features_to_crop, proposal_boxes_normalized,
[features_to_crop], proposal_boxes_normalized, None,
[self._initial_crop_size, self._initial_crop_size])
attention_features = self._context_feature_extract_fn(
......
......@@ -20,8 +20,8 @@ from __future__ import print_function
import functools
import unittest
from unittest import mock # pylint: disable=g-importing-member
from absl.testing import parameterized
import mock
import tensorflow.compat.v1 as tf
import tf_slim as slim
......@@ -41,7 +41,7 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch
from object_detection.protos import box_predictor_pb2
from object_detection.protos import hyperparams_pb2
from object_detection.protos import post_processing_pb2
from object_detection.utils import ops
from object_detection.utils import spatial_transform_ops as spatial_ops
from object_detection.utils import test_case
from object_detection.utils import test_utils
from object_detection.utils import tf_version
......@@ -363,8 +363,9 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
max_negatives_per_positive=None)
crop_and_resize_fn = (
ops.matmul_crop_and_resize
if use_matmul_crop_and_resize else ops.native_crop_and_resize)
spatial_ops.multilevel_matmul_crop_and_resize
if use_matmul_crop_and_resize
else spatial_ops.multilevel_native_crop_and_resize)
common_kwargs = {
'is_training':
is_training,
......
......@@ -261,31 +261,6 @@ class FasterRCNNKerasFeatureExtractor(object):
"""Get model that extracts second stage box classifier features."""
pass
def restore_from_classification_checkpoint_fn(
self,
first_stage_feature_extractor_scope,
second_stage_feature_extractor_scope):
"""Returns a map of variables to load from a foreign checkpoint.
Args:
first_stage_feature_extractor_scope: A scope name for the first stage
feature extractor.
second_stage_feature_extractor_scope: A scope name for the second stage
feature extractor.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
the model graph.
"""
variables_to_restore = {}
for variable in variables_helper.get_global_variables_safely():
for scope_name in [first_stage_feature_extractor_scope,
second_stage_feature_extractor_scope]:
if variable.op.name.startswith(scope_name):
var_name = variable.op.name.replace(scope_name + '/', '')
variables_to_restore[var_name] = variable
return variables_to_restore
class FasterRCNNMetaArch(model.DetectionModel):
"""Faster R-CNN Meta-architecture definition."""
......@@ -1973,9 +1948,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
Returns:
A float32 tensor with shape [K, new_height, new_width, depth].
"""
features_to_crop = [features_to_crop]
num_levels = len(features_to_crop)
box_levels = None
if num_levels != 1:
# If there are multiple levels to select, get the box levels
box_levels = ops.fpn_feature_levels(num_levels, num_levels - 1,
1.0/224, proposal_boxes_normalized)
cropped_regions = self._flatten_first_two_dimensions(
self._crop_and_resize_fn(
features_to_crop, proposal_boxes_normalized,
features_to_crop, proposal_boxes_normalized, box_levels,
[self._initial_crop_size, self._initial_crop_size]))
return self._maxpool_layer(cropped_regions)
......@@ -2542,8 +2524,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
image_shape[1], image_shape[2], check_range=False).get()
flat_cropped_gt_mask = self._crop_and_resize_fn(
tf.expand_dims(flat_gt_masks, -1),
tf.expand_dims(flat_normalized_proposals, axis=1),
[tf.expand_dims(flat_gt_masks, -1)],
tf.expand_dims(flat_normalized_proposals, axis=1), None,
[mask_height, mask_width])
# Without stopping gradients into cropped groundtruth masks the
# performance with 100-padded groundtruth masks when batch size > 1 is
......@@ -2572,7 +2554,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
if second_stage_mask_loss is not None:
mask_loss = tf.multiply(self._second_stage_mask_loss_weight,
second_stage_mask_loss, name='mask_loss')
loss_dict[mask_loss.op.name] = mask_loss
loss_dict['Loss/BoxClassifierLoss/mask_loss'] = mask_loss
return loss_dict
def _get_mask_proposal_boxes_and_classes(
......@@ -2801,6 +2783,46 @@ class FasterRCNNMetaArch(model.DetectionModel):
variables_to_restore, include_patterns=include_patterns)
return {var.op.name: var for var in feature_extractor_variables}
def restore_from_objects(self, fine_tune_checkpoint_type='detection'):
"""Returns a map of Trackable objects to load from a foreign checkpoint.
Returns a dictionary of Tensorflow 2 Trackable objects (e.g. tf.Module
or Checkpoint). This enables the model to initialize based on weights from
another task. For example, the feature extractor variables from a
classification model can be used to bootstrap training of an object
detector. When loading from an object detection model, the checkpoint model
should have the same parameters as this detection model with exception of
the num_classes parameter.
Note that this function is intended to be used to restore Keras-based
models when running Tensorflow 2, whereas restore_map (above) is intended
to be used to restore Slim-based models when running Tensorflow 1.x.
Args:
fine_tune_checkpoint_type: whether to restore from a full detection
checkpoint (with compatible variable names) or to restore from a
classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`. Default 'detection'.
Returns:
A dict mapping keys to Trackable objects (tf.Module or Checkpoint).
"""
if fine_tune_checkpoint_type == 'classification':
return {
'feature_extractor':
self._feature_extractor.classification_backbone
}
elif fine_tune_checkpoint_type == 'detection':
fake_model = tf.train.Checkpoint(
_feature_extractor_for_box_classifier_features=
self._feature_extractor_for_box_classifier_features,
_feature_extractor_for_proposal_features=
self._feature_extractor_for_proposal_features)
return {'model': fake_model}
else:
raise ValueError('Not supported fine_tune_checkpoint_type: {}'.format(
fine_tune_checkpoint_type))
def updates(self):
"""Returns a list of update operators for this model.
......
......@@ -34,7 +34,7 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch
from object_detection.protos import box_predictor_pb2
from object_detection.protos import hyperparams_pb2
from object_detection.protos import post_processing_pb2
from object_detection.utils import ops
from object_detection.utils import spatial_transform_ops as spatial_ops
from object_detection.utils import test_case
from object_detection.utils import test_utils
from object_detection.utils import tf_version
......@@ -377,8 +377,9 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
max_negatives_per_positive=None)
crop_and_resize_fn = (
ops.matmul_crop_and_resize
if use_matmul_crop_and_resize else ops.native_crop_and_resize)
spatial_ops.multilevel_matmul_crop_and_resize
if use_matmul_crop_and_resize
else spatial_ops.multilevel_native_crop_and_resize)
common_kwargs = {
'is_training':
is_training,
......
......@@ -250,35 +250,6 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
def call(self, inputs, **kwargs):
return self._extract_features(inputs)
def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
"""Returns a map of variables to load from a foreign checkpoint.
Args:
feature_extractor_scope: A scope name for the feature extractor.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
the model graph.
"""
variables_to_restore = {}
if tf.executing_eagerly():
for variable in self.variables:
# variable.name includes ":0" at the end, but the names in the
# checkpoint do not have the suffix ":0". So, we strip it here.
var_name = variable.name[:-2]
if var_name.startswith(feature_extractor_scope + '/'):
var_name = var_name.replace(feature_extractor_scope + '/', '')
variables_to_restore[var_name] = variable
else:
# b/137854499: use global_variables.
for variable in variables_helper.get_global_variables_safely():
var_name = variable.op.name
if var_name.startswith(feature_extractor_scope + '/'):
var_name = var_name.replace(feature_extractor_scope + '/', '')
variables_to_restore[var_name] = variable
return variables_to_restore
class SSDMetaArch(model.DetectionModel):
"""SSD Meta-architecture definition."""
......@@ -508,12 +479,9 @@ class SSDMetaArch(model.DetectionModel):
ValueError: if inputs tensor does not have type tf.float32
"""
with tf.name_scope('Preprocessor'):
(resized_inputs,
true_image_shapes) = shape_utils.resize_images_and_return_shapes(
inputs, self._image_resizer_fn)
return (self._feature_extractor.preprocess(resized_inputs),
true_image_shapes)
normalized_inputs = self._feature_extractor.preprocess(inputs)
return shape_utils.resize_images_and_return_shapes(
normalized_inputs, self._image_resizer_fn)
def _compute_clip_window(self, preprocessed_images, true_image_shapes):
"""Computes clip window to use during post_processing.
......@@ -1295,8 +1263,8 @@ class SSDMetaArch(model.DetectionModel):
classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`. Default 'detection'.
load_all_detection_checkpoint_vars: whether to load all variables (when
`fine_tune_checkpoint_type='detection'`). If False, only variables
within the appropriate scopes are included. Default False.
`fine_tune_checkpoint_type` is `detection`). If False, only variables
within the feature extractor scope are included. Default False.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
......@@ -1311,36 +1279,56 @@ class SSDMetaArch(model.DetectionModel):
elif fine_tune_checkpoint_type == 'detection':
variables_to_restore = {}
if tf.executing_eagerly():
for variable in variables_helper.get_global_variables_safely():
var_name = variable.op.name
if load_all_detection_checkpoint_vars:
# Grab all detection vars by name
for variable in self.variables:
# variable.name includes ":0" at the end, but the names in the
# checkpoint do not have the suffix ":0". So, we strip it here.
var_name = variable.name[:-2]
variables_to_restore[var_name] = variable
variables_to_restore[var_name] = variable
else:
# Grab just the feature extractor vars by name
for variable in self._feature_extractor.variables:
# variable.name includes ":0" at the end, but the names in the
# checkpoint do not have the suffix ":0". So, we strip it here.
var_name = variable.name[:-2]
variables_to_restore[var_name] = variable
else:
for variable in variables_helper.get_global_variables_safely():
var_name = variable.op.name
if load_all_detection_checkpoint_vars:
if var_name.startswith(self._extract_features_scope):
variables_to_restore[var_name] = variable
else:
if var_name.startswith(self._extract_features_scope):
variables_to_restore[var_name] = variable
return variables_to_restore
else:
raise ValueError('Not supported fine_tune_checkpoint_type: {}'.format(
fine_tune_checkpoint_type))
def restore_from_objects(self, fine_tune_checkpoint_type='detection'):
"""Returns a map of Trackable objects to load from a foreign checkpoint.
Returns a dictionary of Tensorflow 2 Trackable objects (e.g. tf.Module
or Checkpoint). This enables the model to initialize based on weights from
another task. For example, the feature extractor variables from a
classification model can be used to bootstrap training of an object
detector. When loading from an object detection model, the checkpoint model
should have the same parameters as this detection model with exception of
the num_classes parameter.
Note that this function is intended to be used to restore Keras-based
models when running Tensorflow 2, whereas restore_map (above) is intended
to be used to restore Slim-based models when running Tensorflow 1.x.
Args:
fine_tune_checkpoint_type: whether to restore from a full detection
checkpoint (with compatible variable names) or to restore from a
classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`. Default 'detection'.
Returns:
A dict mapping keys to Trackable objects (tf.Module or Checkpoint).
"""
if fine_tune_checkpoint_type == 'classification':
return {
'feature_extractor':
self._feature_extractor.classification_backbone
}
elif fine_tune_checkpoint_type == 'detection':
fake_model = tf.train.Checkpoint(
_feature_extractor=self._feature_extractor)
return {'model': fake_model}
else:
raise ValueError('Not supported fine_tune_checkpoint_type: {}'.format(
fine_tune_checkpoint_type))
def updates(self):
"""Returns a list of update operators for this model.
......
......@@ -432,14 +432,9 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
return eval_metric_ops
def _check_mask_type_and_value(array_name, masks):
"""Checks whether mask dtype is uint8 and the values are either 0 or 1."""
if masks.dtype != np.uint8:
raise ValueError('{} must be of type np.uint8. Found {}.'.format(
array_name, masks.dtype))
if np.any(np.logical_and(masks != 0, masks != 1)):
raise ValueError('{} elements can only be either 0 or 1.'.format(
array_name))
def convert_masks_to_binary(masks):
"""Converts masks to 0 or 1 and uint8 type."""
return (masks > 0).astype(np.uint8)
class CocoKeypointEvaluator(CocoDetectionEvaluator):
......@@ -952,9 +947,8 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
groundtruth_instance_masks = groundtruth_dict[
standard_fields.InputDataFields.groundtruth_instance_masks]
_check_mask_type_and_value(standard_fields.InputDataFields.
groundtruth_instance_masks,
groundtruth_instance_masks)
groundtruth_instance_masks = convert_masks_to_binary(
groundtruth_instance_masks)
self._groundtruth_list.extend(
coco_tools.
ExportSingleImageGroundtruthToCoco(
......@@ -1013,9 +1007,7 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
'are incompatible: {} vs {}'.format(
groundtruth_masks_shape,
detection_masks.shape))
_check_mask_type_and_value(standard_fields.DetectionResultFields.
detection_masks,
detection_masks)
detection_masks = convert_masks_to_binary(detection_masks)
self._detection_masks_list.extend(
coco_tools.ExportSingleImageDetectionMasksToCoco(
image_id=image_id,
......
......@@ -1424,14 +1424,16 @@ class CocoMaskEvaluationTest(tf.test.TestCase):
image_id='image3',
detections_dict={
standard_fields.DetectionResultFields.detection_boxes:
np.array([[25., 25., 50., 50.]]),
np.array([[25., 25., 50., 50.]]),
standard_fields.DetectionResultFields.detection_scores:
np.array([.8]),
np.array([.8]),
standard_fields.DetectionResultFields.detection_classes:
np.array([1]),
np.array([1]),
standard_fields.DetectionResultFields.detection_masks:
np.pad(np.ones([1, 25, 25], dtype=np.uint8),
((0, 0), (10, 10), (10, 10)), mode='constant')
# The value of 5 is equivalent to 1, since masks will be
# thresholded and binarized before evaluation.
np.pad(5 * np.ones([1, 25, 25], dtype=np.uint8),
((0, 0), (10, 10), (10, 10)), mode='constant')
})
metrics = coco_evaluator.evaluate()
self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP'], 1.0)
......
......@@ -136,15 +136,15 @@ def build_groundtruth_dictionary(data, class_label_map):
dictionary = {
standard_fields.InputDataFields.groundtruth_boxes:
data_location[['YMin', 'XMin', 'YMax', 'XMax']].as_matrix(),
data_location[['YMin', 'XMin', 'YMax', 'XMax']].to_numpy(),
standard_fields.InputDataFields.groundtruth_classes:
data_location['LabelName'].map(lambda x: class_label_map[x]
).as_matrix(),
).to_numpy(),
standard_fields.InputDataFields.groundtruth_group_of:
data_location['IsGroupOf'].as_matrix().astype(int),
data_location['IsGroupOf'].to_numpy().astype(int),
standard_fields.InputDataFields.groundtruth_image_classes:
data_labels['LabelName'].map(lambda x: class_label_map[x]
).as_matrix(),
).to_numpy(),
}
if 'Mask' in data_location:
......@@ -179,9 +179,9 @@ def build_predictions_dictionary(data, class_label_map):
"""
dictionary = {
standard_fields.DetectionResultFields.detection_classes:
data['LabelName'].map(lambda x: class_label_map[x]).as_matrix(),
data['LabelName'].map(lambda x: class_label_map[x]).to_numpy(),
standard_fields.DetectionResultFields.detection_scores:
data['Score'].as_matrix()
data['Score'].to_numpy()
}
if 'Mask' in data:
......@@ -192,6 +192,6 @@ def build_predictions_dictionary(data, class_label_map):
else:
dictionary[standard_fields.DetectionResultFields.detection_boxes] = data[[
'YMin', 'XMin', 'YMax', 'XMax'
]].as_matrix()
]].to_numpy()
return dictionary
......@@ -53,16 +53,16 @@ def build_groundtruth_vrd_dictionary(data, class_label_map,
boxes = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.vrd_box_data_type)
boxes['subject'] = data_boxes[['YMin1', 'XMin1', 'YMax1',
'XMax1']].as_matrix()
boxes['object'] = data_boxes[['YMin2', 'XMin2', 'YMax2', 'XMax2']].as_matrix()
'XMax1']].to_numpy()
boxes['object'] = data_boxes[['YMin2', 'XMin2', 'YMax2', 'XMax2']].to_numpy()
labels = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.label_data_type)
labels['subject'] = data_boxes['LabelName1'].map(
lambda x: class_label_map[x]).as_matrix()
lambda x: class_label_map[x]).to_numpy()
labels['object'] = data_boxes['LabelName2'].map(
lambda x: class_label_map[x]).as_matrix()
lambda x: class_label_map[x]).to_numpy()
labels['relation'] = data_boxes['RelationshipLabel'].map(
lambda x: relationship_label_map[x]).as_matrix()
lambda x: relationship_label_map[x]).to_numpy()
return {
standard_fields.InputDataFields.groundtruth_boxes:
......@@ -71,7 +71,7 @@ def build_groundtruth_vrd_dictionary(data, class_label_map,
labels,
standard_fields.InputDataFields.groundtruth_image_classes:
data_labels['LabelName'].map(lambda x: class_label_map[x])
.as_matrix(),
.to_numpy(),
}
......@@ -104,16 +104,16 @@ def build_predictions_vrd_dictionary(data, class_label_map,
boxes = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.vrd_box_data_type)
boxes['subject'] = data_boxes[['YMin1', 'XMin1', 'YMax1',
'XMax1']].as_matrix()
boxes['object'] = data_boxes[['YMin2', 'XMin2', 'YMax2', 'XMax2']].as_matrix()
'XMax1']].to_numpy()
boxes['object'] = data_boxes[['YMin2', 'XMin2', 'YMax2', 'XMax2']].to_numpy()
labels = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.label_data_type)
labels['subject'] = data_boxes['LabelName1'].map(
lambda x: class_label_map[x]).as_matrix()
lambda x: class_label_map[x]).to_numpy()
labels['object'] = data_boxes['LabelName2'].map(
lambda x: class_label_map[x]).as_matrix()
lambda x: class_label_map[x]).to_numpy()
labels['relation'] = data_boxes['RelationshipLabel'].map(
lambda x: relationship_label_map[x]).as_matrix()
lambda x: relationship_label_map[x]).to_numpy()
return {
standard_fields.DetectionResultFields.detection_boxes:
......@@ -121,5 +121,5 @@ def build_predictions_vrd_dictionary(data, class_label_map,
standard_fields.DetectionResultFields.detection_classes:
labels,
standard_fields.DetectionResultFields.detection_scores:
data_boxes['Score'].as_matrix()
data_boxes['Score'].to_numpy()
}
......@@ -43,7 +43,6 @@ from object_detection.utils import visualization_utils as vis_utils
# pylint: disable=g-import-not-at-top
try:
from tensorflow.contrib import learn as contrib_learn
from tensorflow.contrib import tpu as contrib_tpu
except ImportError:
# TF 2.0 doesn't ship with contrib.
pass
......@@ -94,6 +93,15 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
of groundtruth boxes per image..
'groundtruth_keypoints': [batch_size, num_boxes, num_keypoints, 2] float32
tensor of keypoints (if provided in groundtruth).
'groundtruth_dp_num_points_list': [batch_size, num_boxes] int32 tensor
with the number of DensePose points for each instance (if provided in
groundtruth).
'groundtruth_dp_part_ids_list': [batch_size, num_boxes,
max_sampled_points] int32 tensor with the part ids for each DensePose
sampled point (if provided in groundtruth).
'groundtruth_dp_surface_coords_list': [batch_size, num_boxes,
max_sampled_points, 4] containing the DensePose surface coordinates for
each sampled point (if provided in groundtruth).
'groundtruth_group_of': [batch_size, num_boxes] bool tensor indicating
group_of annotations (if provided in groundtruth).
'groundtruth_labeled_classes': [batch_size, num_classes] int64
......@@ -164,6 +172,21 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
groundtruth[input_data_fields.groundtruth_labeled_classes] = tf.stack(
labeled_classes)
if detection_model.groundtruth_has_field(
fields.BoxListFields.densepose_num_points):
groundtruth[input_data_fields.groundtruth_dp_num_points] = tf.stack(
detection_model.groundtruth_lists(
fields.BoxListFields.densepose_num_points))
if detection_model.groundtruth_has_field(
fields.BoxListFields.densepose_part_ids):
groundtruth[input_data_fields.groundtruth_dp_part_ids] = tf.stack(
detection_model.groundtruth_lists(
fields.BoxListFields.densepose_part_ids))
if detection_model.groundtruth_has_field(
fields.BoxListFields.densepose_surface_coords):
groundtruth[input_data_fields.groundtruth_dp_surface_coords] = tf.stack(
detection_model.groundtruth_lists(
fields.BoxListFields.densepose_surface_coords))
groundtruth[input_data_fields.num_groundtruth_boxes] = (
tf.tile([max_number_of_boxes], multiples=[groundtruth_boxes_shape[0]]))
return groundtruth
......@@ -219,6 +242,9 @@ def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True):
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_keypoints,
fields.InputDataFields.groundtruth_keypoint_visibilities,
fields.InputDataFields.groundtruth_dp_num_points,
fields.InputDataFields.groundtruth_dp_part_ids,
fields.InputDataFields.groundtruth_dp_surface_coords,
fields.InputDataFields.groundtruth_group_of,
fields.InputDataFields.groundtruth_difficult,
fields.InputDataFields.groundtruth_is_crowd,
......@@ -269,6 +295,18 @@ def provide_groundtruth(model, labels):
if fields.InputDataFields.groundtruth_keypoint_visibilities in labels:
gt_keypoint_visibilities_list = labels[
fields.InputDataFields.groundtruth_keypoint_visibilities]
gt_dp_num_points_list = None
if fields.InputDataFields.groundtruth_dp_num_points in labels:
gt_dp_num_points_list = labels[
fields.InputDataFields.groundtruth_dp_num_points]
gt_dp_part_ids_list = None
if fields.InputDataFields.groundtruth_dp_part_ids in labels:
gt_dp_part_ids_list = labels[
fields.InputDataFields.groundtruth_dp_part_ids]
gt_dp_surface_coords_list = None
if fields.InputDataFields.groundtruth_dp_surface_coords in labels:
gt_dp_surface_coords_list = labels[
fields.InputDataFields.groundtruth_dp_surface_coords]
gt_weights_list = None
if fields.InputDataFields.groundtruth_weights in labels:
gt_weights_list = labels[fields.InputDataFields.groundtruth_weights]
......@@ -297,13 +335,16 @@ def provide_groundtruth(model, labels):
groundtruth_masks_list=gt_masks_list,
groundtruth_keypoints_list=gt_keypoints_list,
groundtruth_keypoint_visibilities_list=gt_keypoint_visibilities_list,
groundtruth_dp_num_points_list=gt_dp_num_points_list,
groundtruth_dp_part_ids_list=gt_dp_part_ids_list,
groundtruth_dp_surface_coords_list=gt_dp_surface_coords_list,
groundtruth_weights_list=gt_weights_list,
groundtruth_is_crowd_list=gt_is_crowd_list,
groundtruth_group_of_list=gt_group_of_list,
groundtruth_area_list=gt_area_list)
def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
def create_model_fn(detection_model_fn, configs, hparams=None, use_tpu=False,
postprocess_on_cpu=False):
"""Creates a model function for `Estimator`.
......@@ -377,7 +418,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
side_inputs = detection_model.get_side_inputs(features)
if use_tpu and train_config.use_bfloat16:
with contrib_tpu.bfloat16_scope():
with tf.tpu.bfloat16_scope():
prediction_dict = detection_model.predict(
preprocessed_images,
features[fields.InputDataFields.true_image_shape], **side_inputs)
......@@ -392,7 +433,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT):
if use_tpu and postprocess_on_cpu:
detections = contrib_tpu.outside_compilation(
detections = tf.tpu.outside_compilation(
postprocess_wrapper,
(prediction_dict,
features[fields.InputDataFields.true_image_shape]))
......@@ -468,7 +509,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
if mode == tf.estimator.ModeKeys.TRAIN:
if use_tpu:
training_optimizer = contrib_tpu.CrossShardOptimizer(training_optimizer)
training_optimizer = tf.tpu.CrossShardOptimizer(training_optimizer)
# Optionally freeze some layers by setting their gradients to be zero.
trainable_variables = None
......@@ -588,7 +629,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
# EVAL executes on CPU, so use regular non-TPU EstimatorSpec.
if use_tpu and mode != tf.estimator.ModeKeys.EVAL:
return contrib_tpu.TPUEstimatorSpec(
return tf.estimator.tpu.TPUEstimatorSpec(
mode=mode,
scaffold_fn=scaffold_fn,
predictions=detections,
......@@ -619,8 +660,8 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
def create_estimator_and_inputs(run_config,
hparams,
pipeline_config_path,
hparams=None,
pipeline_config_path=None,
config_override=None,
train_steps=None,
sample_1_of_n_eval_examples=1,
......@@ -639,7 +680,7 @@ def create_estimator_and_inputs(run_config,
Args:
run_config: A `RunConfig`.
hparams: A `HParams`.
hparams: (optional) A `HParams`.
pipeline_config_path: A path to a pipeline config file.
config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to
override the config from `pipeline_config_path`.
......@@ -762,14 +803,14 @@ def create_estimator_and_inputs(run_config,
model_config=model_config, predict_input_config=eval_input_configs[0])
# Read export_to_tpu from hparams if not passed.
if export_to_tpu is None:
if export_to_tpu is None and hparams is not None:
export_to_tpu = hparams.get('export_to_tpu', False)
tf.logging.info('create_estimator_and_inputs: use_tpu %s, export_to_tpu %s',
use_tpu, export_to_tpu)
model_fn = model_fn_creator(detection_model_fn, configs, hparams, use_tpu,
postprocess_on_cpu)
if use_tpu_estimator:
estimator = contrib_tpu.TPUEstimator(
estimator = tf.estimator.tpu.TPUEstimator(
model_fn=model_fn,
train_batch_size=train_config.batch_size,
# For each core, only batch size 1 is supported for eval.
......
......@@ -123,6 +123,9 @@ class SimpleModel(model.DetectionModel):
return []
def restore_map(self, *args, **kwargs):
pass
def restore_from_objects(self, fine_tune_checkpoint_type):
return {'model': self}
def preprocess(self, _):
......@@ -174,7 +177,7 @@ class ModelCheckpointTest(tf.test.TestCase):
class IncompatibleModel(SimpleModel):
def restore_map(self, *args, **kwargs):
def restore_from_objects(self, *args, **kwargs):
return {'weight': self.weight}
......@@ -207,7 +210,6 @@ class CheckpointV2Test(tf.test.TestCase):
model_lib_v2.load_fine_tune_checkpoint(
self._model, self._ckpt_path, checkpoint_type='',
checkpoint_version=train_pb2.CheckpointVersion.V2,
load_all_detection_checkpoint_vars=True,
input_dataset=self._train_input_fn(),
unpad_groundtruth_tensors=True)
np.testing.assert_allclose(self._model.weight.numpy(), 42)
......@@ -220,7 +222,6 @@ class CheckpointV2Test(tf.test.TestCase):
model_lib_v2.load_fine_tune_checkpoint(
IncompatibleModel(), self._ckpt_path, checkpoint_type='',
checkpoint_version=train_pb2.CheckpointVersion.V2,
load_all_detection_checkpoint_vars=True,
input_dataset=self._train_input_fn(),
unpad_groundtruth_tensors=True)
......
......@@ -34,7 +34,6 @@ from object_detection.protos import train_pb2
from object_detection.utils import config_util
from object_detection.utils import label_map_util
from object_detection.utils import ops
from object_detection.utils import variables_helper
from object_detection.utils import visualization_utils as vutils
# pylint: disable=g-import-not-at-top
......@@ -47,13 +46,6 @@ except ImportError:
MODEL_BUILD_UTIL_MAP = model_lib.MODEL_BUILD_UTIL_MAP
### NOTE: This file is a wip.
### TODO(kaftan): Explore adding unit tests for individual methods
### TODO(kaftan): Add unit test that checks training on a single image w/
#### groundtruth, and verfiy that loss goes to zero.
#### Possibly have version that takes it as the whole train & eval dataset,
#### & verify the loss output from the eval_loop method.
### TODO(kaftan): Make sure the unit tests run in TAP presubmits or Kokoro
RESTORE_MAP_ERROR_TEMPLATE = (
'Since we are restoring a v2 style checkpoint'
......@@ -101,6 +93,12 @@ def _compute_losses_and_predictions_dicts(
instance masks for objects.
labels[fields.InputDataFields.groundtruth_keypoints] is a
float32 tensor containing keypoints for each box.
labels[fields.InputDataFields.groundtruth_dp_num_points] is an int32
tensor with the number of sampled DensePose points per object.
labels[fields.InputDataFields.groundtruth_dp_part_ids] is an int32
tensor with the DensePose part ids (0-indexed) per object.
labels[fields.InputDataFields.groundtruth_dp_surface_coords] is a
float32 tensor with the DensePose surface coordinates.
labels[fields.InputDataFields.groundtruth_group_of] is a tf.bool tensor
containing group_of annotations.
labels[fields.InputDataFields.groundtruth_labeled_classes] is a float32
......@@ -203,6 +201,17 @@ def eager_train_step(detection_model,
labels[fields.InputDataFields.groundtruth_keypoints] is a
[batch_size, num_boxes, num_keypoints, 2] float32 tensor containing
keypoints for each box.
labels[fields.InputDataFields.groundtruth_dp_num_points] is a
[batch_size, num_boxes] int32 tensor with the number of DensePose
sampled points per instance.
labels[fields.InputDataFields.groundtruth_dp_part_ids] is a
[batch_size, num_boxes, max_sampled_points] int32 tensor with the
part ids (0-indexed) for each instance.
labels[fields.InputDataFields.groundtruth_dp_surface_coords] is a
[batch_size, num_boxes, max_sampled_points, 4] float32 tensor with the
surface coordinates for each point. Each surface coordinate is of the
form (y, x, v, u) where (y, x) are normalized image locations and
(v, u) are part-relative normalized surface coordinates.
labels[fields.InputDataFields.groundtruth_labeled_classes] is a float32
k-hot tensor of classes.
unpad_groundtruth_tensors: A parameter passed to unstack_batch.
......@@ -277,14 +286,21 @@ def validate_tf_v2_checkpoint_restore_map(checkpoint_restore_map):
"""
for key, value in checkpoint_restore_map.items():
if not (isinstance(key, str) and isinstance(value, tf.Module)):
if not (isinstance(key, str) and
(isinstance(value, tf.Module)
or isinstance(value, tf.train.Checkpoint))):
raise TypeError(RESTORE_MAP_ERROR_TEMPLATE.format(
key.__class__.__name__, value.__class__.__name__))
def is_object_based_checkpoint(checkpoint_path):
"""Returns true if `checkpoint_path` points to an object-based checkpoint."""
var_names = [var[0] for var in tf.train.list_variables(checkpoint_path)]
return '_CHECKPOINTABLE_OBJECT_GRAPH' in var_names
def load_fine_tune_checkpoint(
model, checkpoint_path, checkpoint_type, checkpoint_version,
load_all_detection_checkpoint_vars, input_dataset,
model, checkpoint_path, checkpoint_type, checkpoint_version, input_dataset,
unpad_groundtruth_tensors):
"""Load a fine tuning classification or detection checkpoint.
......@@ -292,8 +308,7 @@ def load_fine_tune_checkpoint(
the model by computing a dummy loss. (Models might not have built their
variables before their first execution)
It then loads a variable-name based classification or detection checkpoint
that comes from converted TF 1.x slim model checkpoints.
It then loads an object-based classification or detection checkpoint.
This method updates the model in-place and does not return a value.
......@@ -306,14 +321,22 @@ def load_fine_tune_checkpoint(
classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`.
checkpoint_version: train_pb2.CheckpointVersion.V1 or V2 enum indicating
whether to load checkpoints in V1 style or V2 style.
load_all_detection_checkpoint_vars: whether to load all variables (when
`fine_tune_checkpoint_type` is `detection`). If False, only variables
within the feature extractor scopes are included. Default False.
whether to load checkpoints in V1 style or V2 style. In this binary
we only support V2 style (object-based) checkpoints.
input_dataset: The tf.data Dataset the model is being trained on. Needed
to get the shapes for the dummy loss computation.
unpad_groundtruth_tensors: A parameter passed to unstack_batch.
Raises:
IOError: if `checkpoint_path` does not point at a valid object-based
checkpoint
ValueError: if `checkpoint_version` is not train_pb2.CheckpointVersion.V2
"""
if not is_object_based_checkpoint(checkpoint_path):
raise IOError('Checkpoint is expected to be an object-based checkpoint.')
if checkpoint_version == train_pb2.CheckpointVersion.V1:
raise ValueError('Checkpoint version should be V2')
features, labels = iter(input_dataset).next()
@tf.function
......@@ -330,32 +353,24 @@ def load_fine_tune_checkpoint(
labels)
strategy = tf.compat.v2.distribute.get_strategy()
strategy.experimental_run_v2(
_dummy_computation_fn, args=(
features,
labels,
))
if hasattr(tf.distribute.Strategy, 'run'):
strategy.run(
_dummy_computation_fn, args=(
features,
labels,
))
else:
strategy.experimental_run_v2(
_dummy_computation_fn, args=(
features,
labels,
))
if checkpoint_version == train_pb2.CheckpointVersion.V1:
var_map = model.restore_map(
fine_tune_checkpoint_type=checkpoint_type,
load_all_detection_checkpoint_vars=(
load_all_detection_checkpoint_vars))
available_var_map = variables_helper.get_variables_available_in_checkpoint(
var_map,
checkpoint_path,
include_global_step=False)
tf.train.init_from_checkpoint(checkpoint_path,
available_var_map)
elif checkpoint_version == train_pb2.CheckpointVersion.V2:
restore_map = model.restore_map(
fine_tune_checkpoint_type=checkpoint_type,
load_all_detection_checkpoint_vars=(
load_all_detection_checkpoint_vars))
validate_tf_v2_checkpoint_restore_map(restore_map)
ckpt = tf.train.Checkpoint(**restore_map)
ckpt.restore(checkpoint_path).assert_existing_objects_matched()
restore_from_objects_dict = model.restore_from_objects(
fine_tune_checkpoint_type=checkpoint_type)
validate_tf_v2_checkpoint_restore_map(restore_from_objects_dict)
ckpt = tf.train.Checkpoint(**restore_from_objects_dict)
ckpt.restore(checkpoint_path).assert_existing_objects_matched()
def get_filepath(strategy, filepath):
......@@ -398,7 +413,7 @@ def train_loop(
train_steps=None,
use_tpu=False,
save_final_config=False,
checkpoint_every_n=1000,
checkpoint_every_n=5000,
checkpoint_max_to_keep=7,
**kwargs):
"""Trains a model using eager + functions.
......@@ -464,8 +479,10 @@ def train_loop(
if kwargs['use_bfloat16']:
tf.compat.v2.keras.mixed_precision.experimental.set_policy('mixed_bfloat16')
load_all_detection_checkpoint_vars = (
train_config.load_all_detection_checkpoint_vars)
if train_config.load_all_detection_checkpoint_vars:
raise ValueError('train_pb2.load_all_detection_checkpoint_vars '
'unsupported in TF2')
config_util.update_fine_tune_checkpoint_type(train_config)
fine_tune_checkpoint_type = train_config.fine_tune_checkpoint_type
fine_tune_checkpoint_version = train_config.fine_tune_checkpoint_version
......@@ -533,7 +550,6 @@ def train_loop(
train_config.fine_tune_checkpoint,
fine_tune_checkpoint_type,
fine_tune_checkpoint_version,
load_all_detection_checkpoint_vars,
train_input,
unpad_groundtruth_tensors)
......@@ -570,8 +586,12 @@ def train_loop(
def _sample_and_train(strategy, train_step_fn, data_iterator):
features, labels = data_iterator.next()
per_replica_losses = strategy.experimental_run_v2(
train_step_fn, args=(features, labels))
if hasattr(tf.distribute.Strategy, 'run'):
per_replica_losses = strategy.run(
train_step_fn, args=(features, labels))
else:
per_replica_losses = strategy.experimental_run_v2(
train_step_fn, args=(features, labels))
# TODO(anjalisridhar): explore if it is safe to remove the
## num_replicas scaling of the loss and switch this to a ReduceOp.Mean
return strategy.reduce(tf.distribute.ReduceOp.SUM,
......@@ -744,28 +764,25 @@ def eager_eval_loop(
return eval_dict, losses_dict, class_agnostic
agnostic_categories = label_map_util.create_class_agnostic_category_index()
per_class_categories = label_map_util.create_category_index_from_labelmap(
eval_input_config.label_map_path)
keypoint_edges = [
(kp.start, kp.end) for kp in eval_config.keypoint_edge]
for i, (features, labels) in enumerate(eval_dataset):
eval_dict, losses_dict, class_agnostic = compute_eval_dict(features, labels)
if class_agnostic:
category_index = agnostic_categories
else:
category_index = per_class_categories
if i % 100 == 0:
tf.logging.info('Finished eval step %d', i)
use_original_images = fields.InputDataFields.original_image in features
if not use_tpu and use_original_images:
# Summary for input images.
tf.compat.v2.summary.image(
name='eval_input_images',
step=global_step,
data=eval_dict['original_image'],
max_outputs=1)
# Summary for prediction/groundtruth side-by-side images.
if class_agnostic:
category_index = label_map_util.create_class_agnostic_category_index()
else:
category_index = label_map_util.create_category_index_from_labelmap(
eval_input_config.label_map_path)
keypoint_edges = [
(kp.start, kp.end) for kp in eval_config.keypoint_edge]
if use_original_images and i < eval_config.num_visualizations:
sbys_image_list = vutils.draw_side_by_side_evaluation_image(
eval_dict,
category_index=category_index,
......@@ -775,10 +792,19 @@ def eager_eval_loop(
keypoint_edges=keypoint_edges or None)
sbys_images = tf.concat(sbys_image_list, axis=0)
tf.compat.v2.summary.image(
name='eval_side_by_side',
name='eval_side_by_side_' + str(i),
step=global_step,
data=sbys_images,
max_outputs=eval_config.num_visualizations)
if eval_util.has_densepose(eval_dict):
dp_image_list = vutils.draw_densepose_visualizations(
eval_dict)
dp_images = tf.concat(dp_image_list, axis=0)
tf.compat.v2.summary.image(
name='densepose_detections_' + str(i),
step=global_step,
data=dp_images,
max_outputs=eval_config.num_visualizations)
if evaluators is None:
if class_agnostic:
......@@ -807,8 +833,10 @@ def eager_eval_loop(
eval_metrics[loss_key] = loss_metrics[loss_key].result()
eval_metrics = {str(k): v for k, v in eval_metrics.items()}
tf.logging.info('Eval metrics at step %d', global_step)
for k in eval_metrics:
tf.compat.v2.summary.scalar(k, eval_metrics[k], step=global_step)
tf.logging.info('\t+ %s: %f', k, eval_metrics[k])
return eval_metrics
......@@ -826,6 +854,7 @@ def eval_continuously(
checkpoint_dir=None,
wait_interval=180,
timeout=3600,
eval_index=None,
**kwargs):
"""Run continuous evaluation of a detection model eagerly.
......@@ -855,6 +884,8 @@ def eval_continuously(
new checkpoint.
timeout: The maximum number of seconds to wait for a checkpoint. Execution
will terminate if no new checkpoints are found after these many seconds.
eval_index: int, optional If give, only evaluate the dataset at the given
index.
**kwargs: Additional keyword arguments for configuration override.
"""
......@@ -908,6 +939,11 @@ def eval_continuously(
model=detection_model)
eval_inputs.append((eval_input_config.name, next_eval_input))
if eval_index is not None:
eval_inputs = [eval_inputs[eval_index]]
tf.logging.info('eval_index selected - {}'.format(
eval_inputs))
global_step = tf.compat.v2.Variable(
0, trainable=False, dtype=tf.compat.v2.dtypes.int64)
......@@ -920,7 +956,7 @@ def eval_continuously(
for eval_name, eval_input in eval_inputs:
summary_writer = tf.compat.v2.summary.create_file_writer(
model_dir + '/eval' + eval_name)
os.path.join(model_dir, 'eval', eval_name))
with summary_writer.as_default():
eager_eval_loop(
detection_model,
......
......@@ -22,7 +22,6 @@ from absl import flags
import tensorflow.compat.v1 as tf
from object_detection import model_hparams
from object_detection import model_lib
flags.DEFINE_string(
......@@ -41,10 +40,6 @@ flags.DEFINE_integer('sample_1_of_n_eval_on_train_examples', 5, 'Will sample '
'one of every n train input examples for evaluation, '
'where n is provided. This is only used if '
'`eval_training_data` is True.')
flags.DEFINE_string(
'hparams_overrides', None, 'Hyperparameter overrides, '
'represented as a string containing comma-separated '
'hparam_name=value pairs.')
flags.DEFINE_string(
'checkpoint_dir', None, 'Path to directory holding a checkpoint. If '
'`checkpoint_dir` is provided, this binary operates in eval-only mode, '
......@@ -68,7 +63,6 @@ def main(unused_argv):
train_and_eval_dict = model_lib.create_estimator_and_inputs(
run_config=config,
hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
pipeline_config_path=FLAGS.pipeline_config_path,
train_steps=FLAGS.num_train_steps,
sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples,
......
......@@ -16,14 +16,6 @@
r"""Creates and runs TF2 object detection models.
##################################
NOTE: This module has not been fully tested; please bear with us while we iron
out the kinks.
##################################
When a TPU device is available, this binary uses TPUStrategy. Otherwise, it uses
GPUS with MirroredStrategy/MultiWorkerMirroredStrategy.
For local training/evaluation run:
PIPELINE_CONFIG_PATH=path/to/pipeline.config
MODEL_DIR=/tmp/model_outputs
......@@ -61,6 +53,12 @@ flags.DEFINE_string(
flags.DEFINE_integer('eval_timeout', 3600, 'Number of seconds to wait for an'
'evaluation checkpoint before exiting.')
flags.DEFINE_bool('use_tpu', False, 'Whether the job is executing on a TPU.')
flags.DEFINE_string(
'tpu_name',
default=None,
help='Name of the Cloud TPU for Cluster Resolvers.')
flags.DEFINE_integer(
'num_workers', 1, 'When num_workers > 1, training uses '
'MultiWorkerMirroredStrategy. When num_workers = 1 it uses '
......@@ -86,7 +84,10 @@ def main(unused_argv):
wait_interval=300, timeout=FLAGS.eval_timeout)
else:
if FLAGS.use_tpu:
resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
# TPU is automatically inferred if tpu_name is None and
# we are running under cloud ai-platform.
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
FLAGS.tpu_name)
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.experimental.TPUStrategy(resolver)
......
......@@ -26,18 +26,8 @@ from absl import flags
import tensorflow.compat.v1 as tf
from object_detection import model_hparams
from object_detection import model_lib
# pylint: disable=g-import-not-at-top
try:
from tensorflow.contrib import cluster_resolver as contrib_cluster_resolver
from tensorflow.contrib import tpu as contrib_tpu
except ImportError:
# TF 2.0 doesn't ship with contrib.
pass
# pylint: enable=g-import-not-at-top
tf.flags.DEFINE_bool('use_tpu', True, 'Use TPUs rather than plain CPUs')
# Cloud TPU Cluster Resolvers
......@@ -67,10 +57,6 @@ flags.DEFINE_string('mode', 'train',
flags.DEFINE_integer('train_batch_size', None, 'Batch size for training. If '
'this is not provided, batch size is read from training '
'config.')
flags.DEFINE_string(
'hparams_overrides', None, 'Comma-separated list of '
'hyperparameters to override defaults.')
flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.')
flags.DEFINE_boolean('eval_training_data', False,
'If training data should be evaluated for this job.')
......@@ -99,15 +85,15 @@ def main(unused_argv):
flags.mark_flag_as_required('pipeline_config_path')
tpu_cluster_resolver = (
contrib_cluster_resolver.TPUClusterResolver(
tf.distribute.cluster_resolver.TPUClusterResolver(
tpu=[FLAGS.tpu_name], zone=FLAGS.tpu_zone, project=FLAGS.gcp_project))
tpu_grpc_url = tpu_cluster_resolver.get_master()
config = contrib_tpu.RunConfig(
config = tf.estimator.tpu.RunConfig(
master=tpu_grpc_url,
evaluation_master=tpu_grpc_url,
model_dir=FLAGS.model_dir,
tpu_config=contrib_tpu.TPUConfig(
tpu_config=tf.estimator.tpu.TPUConfig(
iterations_per_loop=FLAGS.iterations_per_loop,
num_shards=FLAGS.num_shards))
......@@ -117,7 +103,6 @@ def main(unused_argv):
train_and_eval_dict = model_lib.create_estimator_and_inputs(
run_config=config,
hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
pipeline_config_path=FLAGS.pipeline_config_path,
train_steps=FLAGS.num_train_steps,
sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples,
......
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functions to generate bidirectional feature pyramids based on image features.
Provides bidirectional feature pyramid network (BiFPN) generators that can be
used to build object detection feature extractors, as proposed by Tan et al.
See https://arxiv.org/abs/1911.09070 for more details.
"""
import collections
import functools
from six.moves import range
from six.moves import zip
import tensorflow as tf
from object_detection.utils import bifpn_utils
def _create_bifpn_input_config(fpn_min_level,
fpn_max_level,
input_max_level,
level_scales=None):
"""Creates a BiFPN input config for the input levels from a backbone network.
Args:
fpn_min_level: the minimum pyramid level (highest feature map resolution) to
use in the BiFPN.
fpn_max_level: the maximum pyramid level (lowest feature map resolution) to
use in the BiFPN.
input_max_level: the maximum pyramid level that will be provided as input to
the BiFPN. Accordingly, the BiFPN will compute additional pyramid levels
from input_max_level, up to the desired fpn_max_level.
level_scales: a list of pyramid level scale factors. If 'None', each level's
scale is set to 2^level by default, which corresponds to each successive
feature map scaling by a factor of 2.
Returns:
A list of dictionaries for each feature map expected as input to the BiFPN,
where each has entries for the feature map 'name' and 'scale'.
"""
if not level_scales:
level_scales = [2**i for i in range(fpn_min_level, fpn_max_level + 1)]
bifpn_input_params = []
for i in range(fpn_min_level, min(fpn_max_level, input_max_level) + 1):
bifpn_input_params.append({
'name': '0_up_lvl_{}'.format(i),
'scale': level_scales[i - fpn_min_level]
})
return bifpn_input_params
def _get_bifpn_output_node_names(fpn_min_level, fpn_max_level, node_config):
"""Returns a list of BiFPN output node names, given a BiFPN node config.
Args:
fpn_min_level: the minimum pyramid level (highest feature map resolution)
used by the BiFPN.
fpn_max_level: the maximum pyramid level (lowest feature map resolution)
used by the BiFPN.
node_config: the BiFPN node_config, a list of dictionaries corresponding to
each node in the BiFPN computation graph, where each entry should have an
associated 'name'.
Returns:
A list of strings corresponding to the names of the output BiFPN nodes.
"""
num_output_nodes = fpn_max_level - fpn_min_level + 1
return [node['name'] for node in node_config[-num_output_nodes:]]
def _create_bifpn_node_config(bifpn_num_iterations,
bifpn_num_filters,
fpn_min_level,
fpn_max_level,
input_max_level,
bifpn_node_params=None,
level_scales=None):
"""Creates a config specifying a bidirectional feature pyramid network.
Args:
bifpn_num_iterations: the number of top-down bottom-up feature computations
to repeat in the BiFPN.
bifpn_num_filters: the number of filters (channels) for every feature map
used in the BiFPN.
fpn_min_level: the minimum pyramid level (highest feature map resolution) to
use in the BiFPN.
fpn_max_level: the maximum pyramid level (lowest feature map resolution) to
use in the BiFPN.
input_max_level: the maximum pyramid level that will be provided as input to
the BiFPN. Accordingly, the BiFPN will compute additional pyramid levels
from input_max_level, up to the desired fpn_max_level.
bifpn_node_params: If not 'None', a dictionary of additional default BiFPN
node parameters that will be applied to all BiFPN nodes.
level_scales: a list of pyramid level scale factors. If 'None', each level's
scale is set to 2^level by default, which corresponds to each successive
feature map scaling by a factor of 2.
Returns:
A list of dictionaries used to define nodes in the BiFPN computation graph,
as proposed by EfficientDet, Tan et al (https://arxiv.org/abs/1911.09070).
Each node's entry has the corresponding keys:
name: String. The name of this node in the BiFPN. The node name follows
the format '{bifpn_iteration}_{dn|up}_lvl_{pyramid_level}', where 'dn'
or 'up' refers to whether the node is in the top-down or bottom-up
portion of a single BiFPN iteration.
scale: the scale factor for this node, by default 2^level.
inputs: A list of names of nodes which are inputs to this node.
num_channels: The number of channels for this node.
combine_method: String. Name of the method used to combine input
node feature maps, 'fast_attention' by default for nodes which have more
than one input. Otherwise, 'None' for nodes with only one input node.
input_op: A (partial) function which is called to construct the layers
that will be applied to this BiFPN node's inputs. This function is
called with the arguments:
input_op(name, input_scale, input_num_channels, output_scale,
output_num_channels, conv_hyperparams, is_training,
freeze_batchnorm)
post_combine_op: A (partial) function which is called to construct the
layers that will be applied to the result of the combine operation for
this BiFPN node. This function will be called with the arguments:
post_combine_op(name, conv_hyperparams, is_training, freeze_batchnorm)
If 'None', then no layers will be applied after the combine operation
for this node.
"""
if not level_scales:
level_scales = [2**i for i in range(fpn_min_level, fpn_max_level + 1)]
default_node_params = {
'num_channels':
bifpn_num_filters,
'combine_method':
'fast_attention',
'input_op':
functools.partial(
_create_bifpn_resample_block, downsample_method='max_pooling'),
'post_combine_op':
functools.partial(
bifpn_utils.create_conv_block,
num_filters=bifpn_num_filters,
kernel_size=3,
strides=1,
padding='SAME',
use_separable=True,
apply_batchnorm=True,
apply_activation=True,
conv_bn_act_pattern=False),
}
if bifpn_node_params:
default_node_params.update(bifpn_node_params)
bifpn_node_params = []
# Create additional base pyramid levels not provided as input to the BiFPN.
# Note, combine_method and post_combine_op are set to None for additional
# base pyramid levels because they do not combine multiple input BiFPN nodes.
for i in range(input_max_level + 1, fpn_max_level + 1):
node_params = dict(default_node_params)
node_params.update({
'name': '0_up_lvl_{}'.format(i),
'scale': level_scales[i - fpn_min_level],
'inputs': ['0_up_lvl_{}'.format(i - 1)],
'combine_method': None,
'post_combine_op': None,
})
bifpn_node_params.append(node_params)
for i in range(bifpn_num_iterations):
# The first bottom-up feature pyramid (which includes the input pyramid
# levels from the backbone network and the additional base pyramid levels)
# is indexed at 0. So, the first top-down bottom-up pass of the BiFPN is
# indexed from 1, and repeated for bifpn_num_iterations iterations.
bifpn_i = i + 1
# Create top-down nodes.
for level_i in reversed(range(fpn_min_level, fpn_max_level)):
inputs = []
# BiFPN nodes in the top-down pass receive input from the corresponding
# level from the previous BiFPN iteration's bottom-up pass, except for the
# bottom-most (min) level node, which is computed once in the initial
# bottom-up pass, and is afterwards only computed in each top-down pass.
if level_i > fpn_min_level or bifpn_i == 1:
inputs.append('{}_up_lvl_{}'.format(bifpn_i - 1, level_i))
else:
inputs.append('{}_dn_lvl_{}'.format(bifpn_i - 1, level_i))
inputs.append(bifpn_node_params[-1]['name'])
node_params = dict(default_node_params)
node_params.update({
'name': '{}_dn_lvl_{}'.format(bifpn_i, level_i),
'scale': level_scales[level_i - fpn_min_level],
'inputs': inputs
})
bifpn_node_params.append(node_params)
# Create bottom-up nodes.
for level_i in range(fpn_min_level + 1, fpn_max_level + 1):
# BiFPN nodes in the bottom-up pass receive input from the corresponding
# level from the preceding top-down pass, except for the top (max) level
# which does not have a corresponding node in the top-down pass.
inputs = ['{}_up_lvl_{}'.format(bifpn_i - 1, level_i)]
if level_i < fpn_max_level:
inputs.append('{}_dn_lvl_{}'.format(bifpn_i, level_i))
inputs.append(bifpn_node_params[-1]['name'])
node_params = dict(default_node_params)
node_params.update({
'name': '{}_up_lvl_{}'.format(bifpn_i, level_i),
'scale': level_scales[level_i - fpn_min_level],
'inputs': inputs
})
bifpn_node_params.append(node_params)
return bifpn_node_params
def _create_bifpn_resample_block(name,
input_scale,
input_num_channels,
output_scale,
output_num_channels,
conv_hyperparams,
is_training,
freeze_batchnorm,
downsample_method=None,
use_native_resize_op=False,
maybe_apply_1x1_conv=True,
apply_1x1_pre_sampling=True,
apply_1x1_post_sampling=False):
"""Creates resample block layers for input feature maps to BiFPN nodes.
Args:
name: String. Name used for this block of layers.
input_scale: Scale factor of the input feature map.
input_num_channels: Number of channels in the input feature map.
output_scale: Scale factor of the output feature map.
output_num_channels: Number of channels in the output feature map.
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
containing hyperparameters for convolution ops.
is_training: Indicates whether the feature generator is in training mode.
freeze_batchnorm: Bool. Whether to freeze batch norm parameters during
training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm
params.
downsample_method: String. Method to use when downsampling feature maps.
use_native_resize_op: Bool. Whether to use the native resize up when
upsampling feature maps.
maybe_apply_1x1_conv: Bool. If 'True', a 1x1 convolution will only be
applied if the input_num_channels differs from the output_num_channels.
apply_1x1_pre_sampling: Bool. Whether a 1x1 convolution will be applied to
the input feature map before the up/down-sampling operation.
apply_1x1_post_sampling: Bool. Whether a 1x1 convolution will be applied to
the input feature map after the up/down-sampling operation.
Returns:
A list of layers which may be applied to the input feature maps in order to
compute feature maps with the specified scale and number of channels.
"""
# By default, 1x1 convolutions are only applied before sampling when the
# number of input and output channels differ.
if maybe_apply_1x1_conv and output_num_channels == input_num_channels:
apply_1x1_pre_sampling = False
apply_1x1_post_sampling = False
apply_bn_for_resampling = True
layers = []
if apply_1x1_pre_sampling:
layers.extend(
bifpn_utils.create_conv_block(
name=name + '1x1_pre_sample/',
num_filters=output_num_channels,
kernel_size=1,
strides=1,
padding='SAME',
use_separable=False,
apply_batchnorm=apply_bn_for_resampling,
apply_activation=False,
conv_hyperparams=conv_hyperparams,
is_training=is_training,
freeze_batchnorm=freeze_batchnorm))
layers.extend(
bifpn_utils.create_resample_feature_map_ops(input_scale, output_scale,
downsample_method,
use_native_resize_op,
conv_hyperparams, is_training,
freeze_batchnorm, name))
if apply_1x1_post_sampling:
layers.extend(
bifpn_utils.create_conv_block(
name=name + '1x1_post_sample/',
num_filters=output_num_channels,
kernel_size=1,
strides=1,
padding='SAME',
use_separable=False,
apply_batchnorm=apply_bn_for_resampling,
apply_activation=False,
conv_hyperparams=conv_hyperparams,
is_training=is_training,
freeze_batchnorm=freeze_batchnorm))
return layers
def _create_bifpn_combine_op(num_inputs, name, combine_method):
"""Creates a BiFPN output config, a list of the output BiFPN node names.
Args:
num_inputs: The number of inputs to this combine operation.
name: String. The name of this combine operation.
combine_method: String. The method used to combine input feature maps.
Returns:
A function which may be called with a list of num_inputs feature maps
and which will return a single feature map.
"""
combine_op = None
if num_inputs < 1:
raise ValueError('Expected at least 1 input for BiFPN combine.')
elif num_inputs == 1:
combine_op = lambda x: x[0]
else:
combine_op = bifpn_utils.BiFPNCombineLayer(
combine_method=combine_method, name=name)
return combine_op
class KerasBiFpnFeatureMaps(tf.keras.Model):
"""Generates Keras based BiFPN feature maps from an input feature map pyramid.
A Keras model that generates multi-scale feature maps for detection by
iteratively computing top-down and bottom-up feature pyramids, as in the
EfficientDet paper by Tan et al, see arxiv.org/abs/1911.09070 for details.
"""
def __init__(self,
bifpn_num_iterations,
bifpn_num_filters,
fpn_min_level,
fpn_max_level,
input_max_level,
is_training,
conv_hyperparams,
freeze_batchnorm,
bifpn_node_params=None,
name=None):
"""Constructor.
Args:
bifpn_num_iterations: The number of top-down bottom-up iterations.
bifpn_num_filters: The number of filters (channels) to be used for all
feature maps in this BiFPN.
fpn_min_level: The minimum pyramid level (highest feature map resolution)
to use in the BiFPN.
fpn_max_level: The maximum pyramid level (lowest feature map resolution)
to use in the BiFPN.
input_max_level: The maximum pyramid level that will be provided as input
to the BiFPN. Accordingly, the BiFPN will compute any additional pyramid
levels from input_max_level up to the desired fpn_max_level, with each
successivel level downsampling by a scale factor of 2 by default.
is_training: Indicates whether the feature generator is in training mode.
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
containing hyperparameters for convolution ops.
freeze_batchnorm: Bool. Whether to freeze batch norm parameters during
training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm
params.
bifpn_node_params: An optional dictionary that may be used to specify
default parameters for BiFPN nodes, without the need to provide a custom
bifpn_node_config. For example, if '{ combine_method: 'sum' }', then all
BiFPN nodes will combine input feature maps by summation, rather than
by the default fast attention method.
name: A string name scope to assign to the model. If 'None', Keras
will auto-generate one from the class name.
"""
super(KerasBiFpnFeatureMaps, self).__init__(name=name)
bifpn_node_config = _create_bifpn_node_config(
bifpn_num_iterations, bifpn_num_filters, fpn_min_level, fpn_max_level,
input_max_level, bifpn_node_params)
bifpn_input_config = _create_bifpn_input_config(
fpn_min_level, fpn_max_level, input_max_level)
bifpn_output_node_names = _get_bifpn_output_node_names(
fpn_min_level, fpn_max_level, bifpn_node_config)
self.bifpn_node_config = bifpn_node_config
self.bifpn_output_node_names = bifpn_output_node_names
self.node_input_blocks = []
self.node_combine_op = []
self.node_post_combine_block = []
all_node_params = bifpn_input_config
all_node_names = [node['name'] for node in all_node_params]
for node_config in bifpn_node_config:
# Maybe transform and/or resample input feature maps.
input_blocks = []
for input_name in node_config['inputs']:
if input_name not in all_node_names:
raise ValueError(
'Input feature map ({}) does not exist:'.format(input_name))
input_index = all_node_names.index(input_name)
input_params = all_node_params[input_index]
input_block = node_config['input_op'](
name='{}/input_{}/'.format(node_config['name'], input_name),
input_scale=input_params['scale'],
input_num_channels=input_params.get('num_channels', None),
output_scale=node_config['scale'],
output_num_channels=node_config['num_channels'],
conv_hyperparams=conv_hyperparams,
is_training=is_training,
freeze_batchnorm=freeze_batchnorm)
input_blocks.append((input_index, input_block))
# Combine input feature maps.
combine_op = _create_bifpn_combine_op(
num_inputs=len(input_blocks),
name=(node_config['name'] + '/combine'),
combine_method=node_config['combine_method'])
# Post-combine layers.
post_combine_block = []
if node_config['post_combine_op']:
post_combine_block.extend(node_config['post_combine_op'](
name=node_config['name'] + '/post_combine/',
conv_hyperparams=conv_hyperparams,
is_training=is_training,
freeze_batchnorm=freeze_batchnorm))
self.node_input_blocks.append(input_blocks)
self.node_combine_op.append(combine_op)
self.node_post_combine_block.append(post_combine_block)
all_node_params.append(node_config)
all_node_names.append(node_config['name'])
def call(self, feature_pyramid):
"""Compute BiFPN feature maps from input feature pyramid.
Executed when calling the `.__call__` method on input.
Args:
feature_pyramid: list of tuples of (tensor_name, image_feature_tensor).
Returns:
feature_maps: an OrderedDict mapping keys (feature map names) to
tensors where each tensor has shape [batch, height_i, width_i, depth_i].
"""
feature_maps = [el[1] for el in feature_pyramid]
output_feature_maps = [None for node in self.bifpn_output_node_names]
for index, node in enumerate(self.bifpn_node_config):
node_scope = 'node_{:02d}'.format(index)
with tf.name_scope(node_scope):
# Apply layer blocks to this node's input feature maps.
input_block_results = []
for input_index, input_block in self.node_input_blocks[index]:
block_result = feature_maps[input_index]
for layer in input_block:
block_result = layer(block_result)
input_block_results.append(block_result)
# Combine the resulting feature maps.
node_result = self.node_combine_op[index](input_block_results)
# Apply post-combine layer block if applicable.
for layer in self.node_post_combine_block[index]:
node_result = layer(node_result)
feature_maps.append(node_result)
if node['name'] in self.bifpn_output_node_names:
index = self.bifpn_output_node_names.index(node['name'])
output_feature_maps[index] = node_result
return collections.OrderedDict(
zip(self.bifpn_output_node_names, output_feature_maps))
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for bidirectional feature pyramid generators."""
import unittest
from absl.testing import parameterized
import tensorflow.compat.v1 as tf
from google.protobuf import text_format
from object_detection.builders import hyperparams_builder
from object_detection.models import bidirectional_feature_pyramid_generators as bifpn_generators
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
from object_detection.utils import test_utils
from object_detection.utils import tf_version
@parameterized.parameters({'bifpn_num_iterations': 2},
{'bifpn_num_iterations': 8})
@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class BiFPNFeaturePyramidGeneratorTest(test_case.TestCase):
def _build_conv_hyperparams(self):
conv_hyperparams = hyperparams_pb2.Hyperparams()
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
force_use_bias: true
"""
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
def test_get_expected_feature_map_shapes(self, bifpn_num_iterations):
with test_utils.GraphContextOrNone() as g:
image_features = [
('block3', tf.random_uniform([4, 16, 16, 256], dtype=tf.float32)),
('block4', tf.random_uniform([4, 8, 8, 256], dtype=tf.float32)),
('block5', tf.random_uniform([4, 4, 4, 256], dtype=tf.float32))
]
bifpn_generator = bifpn_generators.KerasBiFpnFeatureMaps(
bifpn_num_iterations=bifpn_num_iterations,
bifpn_num_filters=128,
fpn_min_level=3,
fpn_max_level=7,
input_max_level=5,
is_training=True,
conv_hyperparams=self._build_conv_hyperparams(),
freeze_batchnorm=False)
def graph_fn():
feature_maps = bifpn_generator(image_features)
return feature_maps
expected_feature_map_shapes = {
'{}_dn_lvl_3'.format(bifpn_num_iterations): (4, 16, 16, 128),
'{}_up_lvl_4'.format(bifpn_num_iterations): (4, 8, 8, 128),
'{}_up_lvl_5'.format(bifpn_num_iterations): (4, 4, 4, 128),
'{}_up_lvl_6'.format(bifpn_num_iterations): (4, 2, 2, 128),
'{}_up_lvl_7'.format(bifpn_num_iterations): (4, 1, 1, 128)}
out_feature_maps = self.execute(graph_fn, [], g)
out_feature_map_shapes = dict(
(key, value.shape) for key, value in out_feature_maps.items())
self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
def test_get_expected_variable_names(self, bifpn_num_iterations):
with test_utils.GraphContextOrNone() as g:
image_features = [
('block3', tf.random_uniform([4, 16, 16, 256], dtype=tf.float32)),
('block4', tf.random_uniform([4, 8, 8, 256], dtype=tf.float32)),
('block5', tf.random_uniform([4, 4, 4, 256], dtype=tf.float32))
]
bifpn_generator = bifpn_generators.KerasBiFpnFeatureMaps(
bifpn_num_iterations=bifpn_num_iterations,
bifpn_num_filters=128,
fpn_min_level=3,
fpn_max_level=7,
input_max_level=5,
is_training=True,
conv_hyperparams=self._build_conv_hyperparams(),
freeze_batchnorm=False,
name='bifpn')
def graph_fn():
return bifpn_generator(image_features)
self.execute(graph_fn, [], g)
expected_variables = [
'bifpn/node_00/0_up_lvl_6/input_0_up_lvl_5/1x1_pre_sample/conv/bias',
'bifpn/node_00/0_up_lvl_6/input_0_up_lvl_5/1x1_pre_sample/conv/kernel',
'bifpn/node_03/1_dn_lvl_5/input_0_up_lvl_5/1x1_pre_sample/conv/bias',
'bifpn/node_03/1_dn_lvl_5/input_0_up_lvl_5/1x1_pre_sample/conv/kernel',
'bifpn/node_04/1_dn_lvl_4/input_0_up_lvl_4/1x1_pre_sample/conv/bias',
'bifpn/node_04/1_dn_lvl_4/input_0_up_lvl_4/1x1_pre_sample/conv/kernel',
'bifpn/node_05/1_dn_lvl_3/input_0_up_lvl_3/1x1_pre_sample/conv/bias',
'bifpn/node_05/1_dn_lvl_3/input_0_up_lvl_3/1x1_pre_sample/conv/kernel',
'bifpn/node_06/1_up_lvl_4/input_0_up_lvl_4/1x1_pre_sample/conv/bias',
'bifpn/node_06/1_up_lvl_4/input_0_up_lvl_4/1x1_pre_sample/conv/kernel',
'bifpn/node_07/1_up_lvl_5/input_0_up_lvl_5/1x1_pre_sample/conv/bias',
'bifpn/node_07/1_up_lvl_5/input_0_up_lvl_5/1x1_pre_sample/conv/kernel']
expected_node_variable_patterns = [
['bifpn/node_{:02}/{}_dn_lvl_6/combine/bifpn_combine_weights',
'bifpn/node_{:02}/{}_dn_lvl_6/post_combine/separable_conv/bias',
'bifpn/node_{:02}/{}_dn_lvl_6/post_combine/separable_conv/depthwise_kernel',
'bifpn/node_{:02}/{}_dn_lvl_6/post_combine/separable_conv/pointwise_kernel'],
['bifpn/node_{:02}/{}_dn_lvl_5/combine/bifpn_combine_weights',
'bifpn/node_{:02}/{}_dn_lvl_5/post_combine/separable_conv/bias',
'bifpn/node_{:02}/{}_dn_lvl_5/post_combine/separable_conv/depthwise_kernel',
'bifpn/node_{:02}/{}_dn_lvl_5/post_combine/separable_conv/pointwise_kernel'],
['bifpn/node_{:02}/{}_dn_lvl_4/combine/bifpn_combine_weights',
'bifpn/node_{:02}/{}_dn_lvl_4/post_combine/separable_conv/bias',
'bifpn/node_{:02}/{}_dn_lvl_4/post_combine/separable_conv/depthwise_kernel',
'bifpn/node_{:02}/{}_dn_lvl_4/post_combine/separable_conv/pointwise_kernel'],
['bifpn/node_{:02}/{}_dn_lvl_3/combine/bifpn_combine_weights',
'bifpn/node_{:02}/{}_dn_lvl_3/post_combine/separable_conv/bias',
'bifpn/node_{:02}/{}_dn_lvl_3/post_combine/separable_conv/depthwise_kernel',
'bifpn/node_{:02}/{}_dn_lvl_3/post_combine/separable_conv/pointwise_kernel'],
['bifpn/node_{:02}/{}_up_lvl_4/combine/bifpn_combine_weights',
'bifpn/node_{:02}/{}_up_lvl_4/post_combine/separable_conv/bias',
'bifpn/node_{:02}/{}_up_lvl_4/post_combine/separable_conv/depthwise_kernel',
'bifpn/node_{:02}/{}_up_lvl_4/post_combine/separable_conv/pointwise_kernel'],
['bifpn/node_{:02}/{}_up_lvl_5/combine/bifpn_combine_weights',
'bifpn/node_{:02}/{}_up_lvl_5/post_combine/separable_conv/bias',
'bifpn/node_{:02}/{}_up_lvl_5/post_combine/separable_conv/depthwise_kernel',
'bifpn/node_{:02}/{}_up_lvl_5/post_combine/separable_conv/pointwise_kernel'],
['bifpn/node_{:02}/{}_up_lvl_6/combine/bifpn_combine_weights',
'bifpn/node_{:02}/{}_up_lvl_6/post_combine/separable_conv/bias',
'bifpn/node_{:02}/{}_up_lvl_6/post_combine/separable_conv/depthwise_kernel',
'bifpn/node_{:02}/{}_up_lvl_6/post_combine/separable_conv/pointwise_kernel'],
['bifpn/node_{:02}/{}_up_lvl_7/combine/bifpn_combine_weights',
'bifpn/node_{:02}/{}_up_lvl_7/post_combine/separable_conv/bias',
'bifpn/node_{:02}/{}_up_lvl_7/post_combine/separable_conv/depthwise_kernel',
'bifpn/node_{:02}/{}_up_lvl_7/post_combine/separable_conv/pointwise_kernel']]
node_i = 2
for iter_i in range(1, bifpn_num_iterations+1):
for node_variable_patterns in expected_node_variable_patterns:
for pattern in node_variable_patterns:
expected_variables.append(pattern.format(node_i, iter_i))
node_i += 1
expected_variables = set(expected_variables)
actual_variable_set = set(
[var.name.split(':')[0] for var in bifpn_generator.variables])
self.assertSetEqual(expected_variables, actual_variable_set)
# TODO(aom): Tests for create_bifpn_combine_op.
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment