Unverified Commit 36e786dc authored by vivek rathod's avatar vivek rathod Committed by GitHub
Browse files

Merged commit includes the following changes: (#8779)



319539052  by rathodv:

    Internal Change.

--
319537186  by rathodv:

    Internal Change

--
319320800  by jonathanhuang:

    Internal changes.

--
319260368  by ronnyvotel:

    Adding a target assigner for DensePose.

--
319240476  by sbeery:

    switching to main() for argparse

--

PiperOrigin-RevId: 319539052
Co-authored-by: default avatarTF Object Detection Team <no-reply@google.com>
parent 523f5e05
......@@ -110,11 +110,11 @@ if tf_version.is_tf2():
frcnn_resnet_keras.FasterRCNNResnet152KerasFeatureExtractor,
'faster_rcnn_inception_resnet_v2_keras':
frcnn_inc_res_keras.FasterRCNNInceptionResnetV2KerasFeatureExtractor,
'fasret_rcnn_resnet50_fpn_keras':
'faster_rcnn_resnet50_fpn_keras':
frcnn_resnet_fpn_keras.FasterRCNNResnet50FpnKerasFeatureExtractor,
'fasret_rcnn_resnet101_fpn_keras':
'faster_rcnn_resnet101_fpn_keras':
frcnn_resnet_fpn_keras.FasterRCNNResnet101FpnKerasFeatureExtractor,
'fasret_rcnn_resnet152_fpn_keras':
'faster_rcnn_resnet152_fpn_keras':
frcnn_resnet_fpn_keras.FasterRCNNResnet152FpnKerasFeatureExtractor,
}
......
......@@ -45,6 +45,7 @@ from object_detection.box_coders import mean_stddev_box_coder
from object_detection.core import box_coder
from object_detection.core import box_list
from object_detection.core import box_list_ops
from object_detection.core import densepose_ops
from object_detection.core import keypoint_ops
from object_detection.core import matcher as mat
from object_detection.core import region_similarity_calculator as sim_calc
......@@ -799,17 +800,15 @@ def get_batch_predictions_from_indices(batch_predictions, indices):
function.
Args:
batch_predictions: A tensor of shape [batch_size, height, width, 2] for
single class offsets and [batch_size, height, width, class, 2] for
multiple classes offsets (e.g. keypoint joint offsets) representing the
(height, width) or (y_offset, x_offset) predictions over a batch.
indices: A tensor of shape [num_instances, 3] for single class offset and
[num_instances, 4] for multiple classes offsets representing the indices
in the batch to be penalized in a loss function
batch_predictions: A tensor of shape [batch_size, height, width, channels]
or [batch_size, height, width, class, channels] for class-specific
features (e.g. keypoint joint offsets).
indices: A tensor of shape [num_instances, 3] for single class features or
[num_instances, 4] for multiple classes features.
Returns:
values: A tensor of shape [num_instances, 2] holding the predicted values
at the given indices.
values: A tensor of shape [num_instances, channels] holding the predicted
values at the given indices.
"""
return tf.gather_nd(batch_predictions, indices)
......@@ -1657,3 +1656,118 @@ class CenterNetMaskTargetAssigner(object):
segmentation_target = tf.stack(segmentation_targets_list, axis=0)
return segmentation_target
class CenterNetDensePoseTargetAssigner(object):
"""Wrapper to compute targets for DensePose task."""
def __init__(self, stride, num_parts=24):
self._stride = stride
self._num_parts = num_parts
def assign_part_and_coordinate_targets(self,
height,
width,
gt_dp_num_points_list,
gt_dp_part_ids_list,
gt_dp_surface_coords_list,
gt_weights_list=None):
"""Returns the DensePose part_id and coordinate targets and their indices.
The returned values are expected to be used with predicted tensors
of size (batch_size, height//self._stride, width//self._stride, 2). The
predicted values at the relevant indices can be retrieved with the
get_batch_predictions_from_indices function.
Args:
height: int, height of input to the model. This is used to determine the
height of the output.
width: int, width of the input to the model. This is used to determine the
width of the output.
gt_dp_num_points_list: a list of 1-D tf.int32 tensors of shape [num_boxes]
containing the number of DensePose sampled points per box.
gt_dp_part_ids_list: a list of 2-D tf.int32 tensors of shape
[num_boxes, max_sampled_points] containing the DensePose part ids
(0-indexed) for each sampled point. Note that there may be padding, as
boxes may contain a different number of sampled points.
gt_dp_surface_coords_list: a list of 3-D tf.float32 tensors of shape
[num_boxes, max_sampled_points, 4] containing the DensePose surface
coordinates (normalized) for each sampled point. Note that there may be
padding.
gt_weights_list: A list of 1-D tensors with shape [num_boxes]
corresponding to the weight of each groundtruth detection box.
Returns:
batch_indices: an integer tensor of shape [num_total_points, 4] holding
the indices inside the predicted tensor which should be penalized. The
first column indicates the index along the batch dimension and the
second and third columns indicate the index along the y and x
dimensions respectively. The fourth column is the part index.
batch_part_ids: an int tensor of shape [num_total_points, num_parts]
holding 1-hot encodings of parts for each sampled point.
batch_surface_coords: a float tensor of shape [num_total_points, 2]
holding the expected (v, u) coordinates for each sampled point.
batch_weights: a float tensor of shape [num_total_points] indicating the
weight of each prediction.
Note that num_total_points = batch_size * num_boxes * max_sampled_points.
"""
if gt_weights_list is None:
gt_weights_list = [None] * len(gt_dp_num_points_list)
batch_indices = []
batch_part_ids = []
batch_surface_coords = []
batch_weights = []
for i, (num_points, part_ids, surface_coords, weights) in enumerate(
zip(gt_dp_num_points_list, gt_dp_part_ids_list,
gt_dp_surface_coords_list, gt_weights_list)):
num_boxes, max_sampled_points = (
shape_utils.combined_static_and_dynamic_shape(part_ids))
part_ids_flattened = tf.reshape(part_ids, [-1])
part_ids_one_hot = tf.one_hot(part_ids_flattened, depth=self._num_parts)
# Get DensePose coordinates in the output space.
surface_coords_abs = densepose_ops.to_absolute_coordinates(
surface_coords, height // self._stride, width // self._stride)
surface_coords_abs = tf.reshape(surface_coords_abs, [-1, 4])
# Each tensor has shape [num_boxes * max_sampled_points].
yabs, xabs, v, u = tf.unstack(surface_coords_abs, axis=-1)
# Get the indices (in output space) for the DensePose coordinates. Note
# that if self._stride is larger than 1, this will have the effect of
# reducing spatial resolution of the groundtruth points.
indices_y = tf.cast(yabs, tf.int32)
indices_x = tf.cast(xabs, tf.int32)
# Assign ones if weights are not provided.
if weights is None:
weights = tf.ones(num_boxes, dtype=tf.float32)
# Create per-point weights.
weights_per_point = tf.reshape(
tf.tile(weights[:, tf.newaxis], multiples=[1, max_sampled_points]),
shape=[-1])
# Mask out invalid (i.e. padded) DensePose points.
num_points_tiled = tf.tile(num_points[:, tf.newaxis],
multiples=[1, max_sampled_points])
range_tiled = tf.tile(tf.range(max_sampled_points)[tf.newaxis, :],
multiples=[num_boxes, 1])
valid_points = tf.math.less(range_tiled, num_points_tiled)
valid_points = tf.cast(tf.reshape(valid_points, [-1]), dtype=tf.float32)
weights_per_point = weights_per_point * valid_points
# Shape of [num_boxes * max_sampled_points] integer tensor filled with
# current batch index.
batch_index = i * tf.ones_like(indices_y, dtype=tf.int32)
batch_indices.append(
tf.stack([batch_index, indices_y, indices_x, part_ids_flattened],
axis=1))
batch_part_ids.append(part_ids_one_hot)
batch_surface_coords.append(tf.stack([v, u], axis=1))
batch_weights.append(weights_per_point)
batch_indices = tf.concat(batch_indices, axis=0)
batch_part_ids = tf.concat(batch_part_ids, axis=0)
batch_surface_coords = tf.concat(batch_surface_coords, axis=0)
batch_weights = tf.concat(batch_weights, axis=0)
return batch_indices, batch_part_ids, batch_surface_coords, batch_weights
......@@ -1906,6 +1906,99 @@ class CenterNetMaskTargetAssignerTest(test_case.TestCase):
expected_seg_target, segmentation_target)
class CenterNetDensePoseTargetAssignerTest(test_case.TestCase):
def test_assign_part_and_coordinate_targets(self):
def graph_fn():
gt_dp_num_points_list = [
# Example 0.
tf.constant([2, 0, 3], dtype=tf.int32),
# Example 1.
tf.constant([1, 1], dtype=tf.int32),
]
gt_dp_part_ids_list = [
# Example 0.
tf.constant([[1, 6, 0],
[0, 0, 0],
[0, 2, 3]], dtype=tf.int32),
# Example 1.
tf.constant([[7, 0, 0],
[0, 0, 0]], dtype=tf.int32),
]
gt_dp_surface_coords_list = [
# Example 0.
tf.constant(
[[[0.11, 0.2, 0.3, 0.4], # Box 0.
[0.6, 0.4, 0.1, 0.0],
[0.0, 0.0, 0.0, 0.0]],
[[0.0, 0.0, 0.0, 0.0], # Box 1.
[0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0]],
[[0.22, 0.1, 0.6, 0.8], # Box 2.
[0.0, 0.4, 0.5, 1.0],
[0.3, 0.2, 0.4, 0.1]]],
dtype=tf.float32),
# Example 1.
tf.constant(
[[[0.5, 0.5, 0.3, 1.0], # Box 0.
[0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0]],
[[0.2, 0.2, 0.5, 0.8], # Box 1.
[0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0]]],
dtype=tf.float32),
]
gt_weights_list = [
# Example 0.
tf.constant([1.0, 1.0, 0.5], dtype=tf.float32),
# Example 1.
tf.constant([0.0, 1.0], dtype=tf.float32),
]
cn_assigner = targetassigner.CenterNetDensePoseTargetAssigner(stride=4)
batch_indices, batch_part_ids, batch_surface_coords, batch_weights = (
cn_assigner.assign_part_and_coordinate_targets(
height=120,
width=80,
gt_dp_num_points_list=gt_dp_num_points_list,
gt_dp_part_ids_list=gt_dp_part_ids_list,
gt_dp_surface_coords_list=gt_dp_surface_coords_list,
gt_weights_list=gt_weights_list))
return batch_indices, batch_part_ids, batch_surface_coords, batch_weights
batch_indices, batch_part_ids, batch_surface_coords, batch_weights = (
self.execute(graph_fn, []))
expected_batch_indices = np.array([
# Example 0. e.g.
# The first set of indices is calculated as follows:
# floor(0.11*120/4) = 3, floor(0.2*80/4) = 4.
[0, 3, 4, 1], [0, 18, 8, 6], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0],
[0, 0, 0, 0], [0, 6, 2, 0], [0, 0, 8, 2], [0, 9, 4, 3],
# Example 1.
[1, 15, 10, 7], [1, 0, 0, 0], [1, 0, 0, 0], [1, 6, 4, 0], [1, 0, 0, 0],
[1, 0, 0, 0]
], dtype=np.int32)
expected_batch_part_ids = tf.one_hot(
[1, 6, 0, 0, 0, 0, 0, 2, 3, 7, 0, 0, 0, 0, 0], depth=24).numpy()
expected_batch_surface_coords = np.array([
# Box 0.
[0.3, 0.4], [0.1, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0],
[0.6, 0.8], [0.5, 1.0], [0.4, 0.1],
# Box 1.
[0.3, 1.0], [0.0, 0.0], [0.0, 0.0], [0.5, 0.8], [0.0, 0.0], [0.0, 0.0],
], np.float32)
expected_batch_weights = np.array([
# Box 0.
1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5,
# Box 1.
0.0, 0.0, 0.0, 1.0, 0.0, 0.0
], dtype=np.float32)
self.assertAllEqual(expected_batch_indices, batch_indices)
self.assertAllEqual(expected_batch_part_ids, batch_part_ids)
self.assertAllClose(expected_batch_surface_coords, batch_surface_coords)
self.assertAllClose(expected_batch_weights, batch_weights)
if __name__ == '__main__':
tf.enable_v2_behavior()
tf.test.main()
......@@ -51,7 +51,6 @@ import itertools
import json
import os
from absl import app
import apache_beam as beam
import numpy as np
import PIL.Image
......@@ -932,4 +931,4 @@ def main(argv=None, save_main_session=True):
if __name__ == '__main__':
app.run(main)
main()
......@@ -39,7 +39,6 @@ import io
import json
import logging
import os
from absl import app
import apache_beam as beam
import numpy as np
import PIL.Image
......@@ -338,4 +337,4 @@ def main(argv=None, save_main_session=True):
if __name__ == '__main__':
app.run(main)
main()
......@@ -48,7 +48,6 @@ from __future__ import print_function
import argparse
import os
import threading
from absl import app
import apache_beam as beam
import tensorflow.compat.v1 as tf
......@@ -290,4 +289,4 @@ def main(argv=None, save_main_session=True):
if __name__ == '__main__':
app.run(main)
main()
......@@ -52,8 +52,6 @@ import datetime
import os
import threading
from absl import app
import apache_beam as beam
import numpy as np
import six
......@@ -410,5 +408,7 @@ def main(argv=None, save_main_session=True):
p.run()
if __name__ == '__main__':
app.run(main)
main()
......@@ -2547,7 +2547,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
if second_stage_mask_loss is not None:
mask_loss = tf.multiply(self._second_stage_mask_loss_weight,
second_stage_mask_loss, name='mask_loss')
loss_dict[mask_loss.op.name] = mask_loss
loss_dict['Loss/BoxClassifierLoss/mask_loss'] = mask_loss
return loss_dict
def _get_mask_proposal_boxes_and_classes(
......
......@@ -56,7 +56,7 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractor(
the resnet_v1.resnet_v1_{50,101,152} models.
resnet_v1_base_model_name: model name under which to construct resnet v1.
first_stage_features_stride: See base class.
conv_hyperparameters: a `hyperparams_builder.KerasLayerHyperparams` object
conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
containing convolution hyperparameters for the layers added on top of
the base feature extractor.
batch_norm_trainable: See base class.
......@@ -143,19 +143,21 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractor(
with tf.name_scope('ResnetV1FPN'):
full_resnet_v1_model = self._resnet_v1_base_model(
batchnorm_training=self._train_batch_norm,
conv_hyperparams=(self._conv_hyperparams
if self._override_base_feature_extractor_hyperparams
conv_hyperparams=(self._conv_hyperparams if
self._override_base_feature_extractor_hyperparams
else None),
classes=None,
weights=None,
include_top=False)
output_layers = _RESNET_MODEL_OUTPUT_LAYERS[self._resnet_v1_base_model_name]
output_layers = _RESNET_MODEL_OUTPUT_LAYERS[
self._resnet_v1_base_model_name]
outputs = [full_resnet_v1_model.get_layer(output_layer_name).output
for output_layer_name in output_layers]
self.classification_backbone = tf.keras.Model(
inputs=full_resnet_v1_model.inputs,
outputs=outputs)
backbone_outputs = self.classification_backbone(full_resnet_v1_model.inputs)
backbone_outputs = self.classification_backbone(
full_resnet_v1_model.inputs)
# construct FPN feature generator
self._base_fpn_max_level = min(self._fpn_max_level, 5)
......@@ -236,7 +238,7 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractor(
"""
with tf.name_scope(name):
with tf.name_scope('ResnetV1FPN'):
# TODO: Add a batchnorm layer between two fc layers.
# TODO(yiming): Add a batchnorm layer between two fc layers.
feature_extractor_model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(units=1024, activation='relu'),
......@@ -283,12 +285,15 @@ class FasterRCNNResnet50FpnKerasFeatureExtractor(
fpn_min_level=fpn_min_level,
fpn_max_level=fpn_max_level,
additional_layer_depth=additional_layer_depth,
override_base_feature_extractor_hyperparams=override_base_feature_extractor_hyperparams)
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams
)
class FasterRCNNResnet101FpnKerasFeatureExtractor(
FasterRCNNResnetV1FpnKerasFeatureExtractor):
"""Faster RCNN with Resnet101 FPN feature extractor."""
def __init__(self,
is_training,
first_stage_features_stride=16,
......@@ -323,7 +328,8 @@ class FasterRCNNResnet101FpnKerasFeatureExtractor(
fpn_min_level=fpn_min_level,
fpn_max_level=fpn_max_level,
additional_layer_depth=additional_layer_depth,
override_base_feature_extractor_hyperparams=override_base_feature_extractor_hyperparams)
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams)
class FasterRCNNResnet152FpnKerasFeatureExtractor(
......@@ -364,4 +370,5 @@ class FasterRCNNResnet152FpnKerasFeatureExtractor(
fpn_min_level=fpn_min_level,
fpn_max_level=fpn_max_level,
additional_layer_depth=additional_layer_depth,
override_base_feature_extractor_hyperparams=override_base_feature_extractor_hyperparams)
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams)
......@@ -21,8 +21,8 @@ from google.protobuf import text_format
from object_detection.builders import hyperparams_builder
from object_detection.models import faster_rcnn_resnet_v1_fpn_keras_feature_extractor as frcnn_res_fpn
from object_detection.utils import tf_version
from object_detection.protos import hyperparams_pb2
from object_detection.utils import tf_version
@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
......@@ -40,7 +40,7 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractorTest(tf.test.TestCase):
}
}
"""
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
text_format.Parse(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
def _build_feature_extractor(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment