"...resnet50_tensorflow.git" did not exist on "688bc4c97e3451fff546f73710b5820161f52160"
Unverified Commit e6017471 authored by vivek rathod's avatar vivek rathod Committed by GitHub
Browse files

Merged commit includes the following changes: (#8761)



319125512  by aom:

    Internal change

--
319108395  by rathodv:

    Internal Change

--
319106259  by ronnyvotel:

    Updating input pipeline to return DensePose labels.

--

PiperOrigin-RevId: 319125512
Co-authored-by: default avatarZhichao Lu <lzc@google.com>
parent 58d19c67
...@@ -58,7 +58,8 @@ def build(input_reader_config): ...@@ -58,7 +58,8 @@ def build(input_reader_config):
use_display_name=input_reader_config.use_display_name, use_display_name=input_reader_config.use_display_name,
num_additional_channels=input_reader_config.num_additional_channels, num_additional_channels=input_reader_config.num_additional_channels,
num_keypoints=input_reader_config.num_keypoints, num_keypoints=input_reader_config.num_keypoints,
expand_hierarchy_labels=input_reader_config.expand_labels_hierarchy) expand_hierarchy_labels=input_reader_config.expand_labels_hierarchy,
load_dense_pose=input_reader_config.load_dense_pose)
return decoder return decoder
elif input_type == input_reader_pb2.InputType.Value('TF_SEQUENCE_EXAMPLE'): elif input_type == input_reader_pb2.InputType.Value('TF_SEQUENCE_EXAMPLE'):
decoder = tf_sequence_example_decoder.TfSequenceExampleDecoder( decoder = tf_sequence_example_decoder.TfSequenceExampleDecoder(
......
...@@ -27,6 +27,7 @@ from object_detection.builders import model_builder ...@@ -27,6 +27,7 @@ from object_detection.builders import model_builder
from object_detection.builders import preprocessor_builder from object_detection.builders import preprocessor_builder
from object_detection.core import box_list from object_detection.core import box_list
from object_detection.core import box_list_ops from object_detection.core import box_list_ops
from object_detection.core import densepose_ops
from object_detection.core import keypoint_ops from object_detection.core import keypoint_ops
from object_detection.core import preprocessor from object_detection.core import preprocessor
from object_detection.core import standard_fields as fields from object_detection.core import standard_fields as fields
...@@ -289,6 +290,13 @@ def transform_input_data(tensor_dict, ...@@ -289,6 +290,13 @@ def transform_input_data(tensor_dict,
out_tensor_dict[flds_gt_kpt_vis], out_tensor_dict[flds_gt_kpt_vis],
keypoint_type_weight)) keypoint_type_weight))
dp_surface_coords_fld = fields.InputDataFields.groundtruth_dp_surface_coords
if dp_surface_coords_fld in tensor_dict:
dp_surface_coords = out_tensor_dict[dp_surface_coords_fld]
realigned_dp_surface_coords = densepose_ops.change_coordinate_frame(
dp_surface_coords, im_box)
out_tensor_dict[dp_surface_coords_fld] = realigned_dp_surface_coords
if use_bfloat16: if use_bfloat16:
preprocessed_resized_image = tf.cast( preprocessed_resized_image = tf.cast(
preprocessed_resized_image, tf.bfloat16) preprocessed_resized_image, tf.bfloat16)
...@@ -355,7 +363,8 @@ def pad_input_data_to_static_shapes(tensor_dict, ...@@ -355,7 +363,8 @@ def pad_input_data_to_static_shapes(tensor_dict,
num_classes, num_classes,
spatial_image_shape=None, spatial_image_shape=None,
max_num_context_features=None, max_num_context_features=None,
context_feature_length=None): context_feature_length=None,
max_dp_points=336):
"""Pads input tensors to static shapes. """Pads input tensors to static shapes.
In case num_additional_channels > 0, we assume that the additional channels In case num_additional_channels > 0, we assume that the additional channels
...@@ -372,6 +381,11 @@ def pad_input_data_to_static_shapes(tensor_dict, ...@@ -372,6 +381,11 @@ def pad_input_data_to_static_shapes(tensor_dict,
max_num_context_features (optional): The maximum number of context max_num_context_features (optional): The maximum number of context
features needed to compute shapes padding. features needed to compute shapes padding.
context_feature_length (optional): The length of the context feature. context_feature_length (optional): The length of the context feature.
max_dp_points (optional): The maximum number of DensePose sampled points per
instance. The default (336) is selected since the original DensePose paper
(https://arxiv.org/pdf/1802.00434.pdf) indicates that the maximum number
of samples per part is 14, and therefore 24 * 14 = 336 is the maximum
sampler per instance.
Returns: Returns:
A dictionary keyed by fields.InputDataFields containing padding shapes for A dictionary keyed by fields.InputDataFields containing padding shapes for
...@@ -476,6 +490,15 @@ def pad_input_data_to_static_shapes(tensor_dict, ...@@ -476,6 +490,15 @@ def pad_input_data_to_static_shapes(tensor_dict,
padding_shape = [max_num_boxes, shape_utils.get_dim_as_int(tensor_shape[1])] padding_shape = [max_num_boxes, shape_utils.get_dim_as_int(tensor_shape[1])]
padding_shapes[fields.InputDataFields. padding_shapes[fields.InputDataFields.
groundtruth_keypoint_weights] = padding_shape groundtruth_keypoint_weights] = padding_shape
if fields.InputDataFields.groundtruth_dp_num_points in tensor_dict:
padding_shapes[
fields.InputDataFields.groundtruth_dp_num_points] = [max_num_boxes]
padding_shapes[
fields.InputDataFields.groundtruth_dp_part_ids] = [
max_num_boxes, max_dp_points]
padding_shapes[
fields.InputDataFields.groundtruth_dp_surface_coords] = [
max_num_boxes, max_dp_points, 4]
# Prepare for ContextRCNN related fields. # Prepare for ContextRCNN related fields.
if fields.InputDataFields.context_features in tensor_dict: if fields.InputDataFields.context_features in tensor_dict:
...@@ -535,6 +558,10 @@ def augment_input_data(tensor_dict, data_augmentation_options): ...@@ -535,6 +558,10 @@ def augment_input_data(tensor_dict, data_augmentation_options):
in tensor_dict) in tensor_dict)
include_multiclass_scores = (fields.InputDataFields.multiclass_scores in include_multiclass_scores = (fields.InputDataFields.multiclass_scores in
tensor_dict) tensor_dict)
dense_pose_fields = [fields.InputDataFields.groundtruth_dp_num_points,
fields.InputDataFields.groundtruth_dp_part_ids,
fields.InputDataFields.groundtruth_dp_surface_coords]
include_dense_pose = all(field in tensor_dict for field in dense_pose_fields)
tensor_dict = preprocessor.preprocess( tensor_dict = preprocessor.preprocess(
tensor_dict, data_augmentation_options, tensor_dict, data_augmentation_options,
func_arg_map=preprocessor.get_default_func_arg_map( func_arg_map=preprocessor.get_default_func_arg_map(
...@@ -543,7 +570,8 @@ def augment_input_data(tensor_dict, data_augmentation_options): ...@@ -543,7 +570,8 @@ def augment_input_data(tensor_dict, data_augmentation_options):
include_multiclass_scores=include_multiclass_scores, include_multiclass_scores=include_multiclass_scores,
include_instance_masks=include_instance_masks, include_instance_masks=include_instance_masks,
include_keypoints=include_keypoints, include_keypoints=include_keypoints,
include_keypoint_visibilities=include_keypoint_visibilities)) include_keypoint_visibilities=include_keypoint_visibilities,
include_dense_pose=include_dense_pose))
tensor_dict[fields.InputDataFields.image] = tf.squeeze( tensor_dict[fields.InputDataFields.image] = tf.squeeze(
tensor_dict[fields.InputDataFields.image], axis=0) tensor_dict[fields.InputDataFields.image], axis=0)
return tensor_dict return tensor_dict
...@@ -572,6 +600,9 @@ def _get_labels_dict(input_dict): ...@@ -572,6 +600,9 @@ def _get_labels_dict(input_dict):
fields.InputDataFields.groundtruth_difficult, fields.InputDataFields.groundtruth_difficult,
fields.InputDataFields.groundtruth_keypoint_visibilities, fields.InputDataFields.groundtruth_keypoint_visibilities,
fields.InputDataFields.groundtruth_keypoint_weights, fields.InputDataFields.groundtruth_keypoint_weights,
fields.InputDataFields.groundtruth_dp_num_points,
fields.InputDataFields.groundtruth_dp_part_ids,
fields.InputDataFields.groundtruth_dp_surface_coords
] ]
for key in optional_label_keys: for key in optional_label_keys:
...@@ -720,6 +751,17 @@ def train_input(train_config, train_input_config, ...@@ -720,6 +751,17 @@ def train_input(train_config, train_input_config,
groundtruth visibilities for each keypoint. groundtruth visibilities for each keypoint.
labels[fields.InputDataFields.groundtruth_labeled_classes] is a labels[fields.InputDataFields.groundtruth_labeled_classes] is a
[batch_size, num_classes] float32 k-hot tensor of classes. [batch_size, num_classes] float32 k-hot tensor of classes.
labels[fields.InputDataFields.groundtruth_dp_num_points] is a
[batch_size, num_boxes] int32 tensor with the number of sampled
DensePose points per object.
labels[fields.InputDataFields.groundtruth_dp_part_ids] is a
[batch_size, num_boxes, max_sampled_points] int32 tensor with the
DensePose part ids (0-indexed) per object.
labels[fields.InputDataFields.groundtruth_dp_surface_coords] is a
[batch_size, num_boxes, max_sampled_points, 4] float32 tensor with the
DensePose surface coordinates. The format is (y, x, v, u), where (y, x)
are normalized image coordinates and (v, u) are normalized surface part
coordinates.
Raises: Raises:
TypeError: if the `train_config`, `train_input_config` or `model_config` TypeError: if the `train_config`, `train_input_config` or `model_config`
...@@ -861,6 +903,17 @@ def eval_input(eval_config, eval_input_config, model_config, ...@@ -861,6 +903,17 @@ def eval_input(eval_config, eval_input_config, model_config,
same class which heavily occlude each other. same class which heavily occlude each other.
labels[fields.InputDataFields.groundtruth_labeled_classes] is a labels[fields.InputDataFields.groundtruth_labeled_classes] is a
[num_boxes, num_classes] float32 k-hot tensor of classes. [num_boxes, num_classes] float32 k-hot tensor of classes.
labels[fields.InputDataFields.groundtruth_dp_num_points] is a
[batch_size, num_boxes] int32 tensor with the number of sampled
DensePose points per object.
labels[fields.InputDataFields.groundtruth_dp_part_ids] is a
[batch_size, num_boxes, max_sampled_points] int32 tensor with the
DensePose part ids (0-indexed) per object.
labels[fields.InputDataFields.groundtruth_dp_surface_coords] is a
[batch_size, num_boxes, max_sampled_points, 4] float32 tensor with the
DensePose surface coordinates. The format is (y, x, v, u), where (y, x)
are normalized image coordinates and (v, u) are normalized surface part
coordinates.
Raises: Raises:
TypeError: if the `eval_config`, `eval_input_config` or `model_config` TypeError: if the `eval_config`, `eval_input_config` or `model_config`
......
...@@ -1293,6 +1293,51 @@ class DataTransformationFnTest(test_case.TestCase, parameterized.TestCase): ...@@ -1293,6 +1293,51 @@ class DataTransformationFnTest(test_case.TestCase, parameterized.TestCase):
groundtruth_keypoint_weights, groundtruth_keypoint_weights,
[[1.0, 1.0], [1.0, 1.0]]) [[1.0, 1.0], [1.0, 1.0]])
def test_groundtruth_dense_pose(self):
def graph_fn():
tensor_dict = {
fields.InputDataFields.image:
tf.constant(np.random.rand(100, 50, 3).astype(np.float32)),
fields.InputDataFields.groundtruth_boxes:
tf.constant(np.array([[.5, .5, 1, 1], [.0, .0, .5, .5]],
np.float32)),
fields.InputDataFields.groundtruth_classes:
tf.constant(np.array([1, 2], np.int32)),
fields.InputDataFields.groundtruth_dp_num_points:
tf.constant([0, 2], dtype=tf.int32),
fields.InputDataFields.groundtruth_dp_part_ids:
tf.constant([[0, 0], [4, 23]], dtype=tf.int32),
fields.InputDataFields.groundtruth_dp_surface_coords:
tf.constant([[[0., 0., 0., 0.,], [0., 0., 0., 0.,]],
[[0.1, 0.2, 0.3, 0.4,], [0.6, 0.8, 0.6, 0.7,]]],
dtype=tf.float32),
}
num_classes = 1
input_transformation_fn = functools.partial(
inputs.transform_input_data,
model_preprocess_fn=_fake_resize50_preprocess_fn,
image_resizer_fn=_fake_image_resizer_fn,
num_classes=num_classes)
transformed_inputs = input_transformation_fn(tensor_dict=tensor_dict)
transformed_dp_num_points = transformed_inputs[
fields.InputDataFields.groundtruth_dp_num_points]
transformed_dp_part_ids = transformed_inputs[
fields.InputDataFields.groundtruth_dp_part_ids]
transformed_dp_surface_coords = transformed_inputs[
fields.InputDataFields.groundtruth_dp_surface_coords]
return (transformed_dp_num_points, transformed_dp_part_ids,
transformed_dp_surface_coords)
dp_num_points, dp_part_ids, dp_surface_coords = self.execute_cpu(
graph_fn, [])
self.assertAllEqual(dp_num_points, [0, 2])
self.assertAllEqual(dp_part_ids, [[0, 0], [4, 23]])
self.assertAllClose(
dp_surface_coords,
[[[0., 0., 0., 0.,], [0., 0., 0., 0.,]],
[[0.1, 0.1, 0.3, 0.4,], [0.6, 0.4, 0.6, 0.7,]]])
class PadInputDataToStaticShapesFnTest(test_case.TestCase): class PadInputDataToStaticShapesFnTest(test_case.TestCase):
...@@ -1454,6 +1499,35 @@ class PadInputDataToStaticShapesFnTest(test_case.TestCase): ...@@ -1454,6 +1499,35 @@ class PadInputDataToStaticShapesFnTest(test_case.TestCase):
fields.InputDataFields.groundtruth_keypoint_visibilities] fields.InputDataFields.groundtruth_keypoint_visibilities]
.shape.as_list(), [3, 16]) .shape.as_list(), [3, 16])
def test_dense_pose(self):
input_tensor_dict = {
fields.InputDataFields.groundtruth_dp_num_points:
tf.constant([0, 2], dtype=tf.int32),
fields.InputDataFields.groundtruth_dp_part_ids:
tf.constant([[0, 0], [4, 23]], dtype=tf.int32),
fields.InputDataFields.groundtruth_dp_surface_coords:
tf.constant([[[0., 0., 0., 0.,], [0., 0., 0., 0.,]],
[[0.1, 0.2, 0.3, 0.4,], [0.6, 0.8, 0.6, 0.7,]]],
dtype=tf.float32),
}
padded_tensor_dict = inputs.pad_input_data_to_static_shapes(
tensor_dict=input_tensor_dict,
max_num_boxes=3,
num_classes=1,
spatial_image_shape=[128, 128],
max_dp_points=200)
self.assertAllEqual(
padded_tensor_dict[fields.InputDataFields.groundtruth_dp_num_points]
.shape.as_list(), [3])
self.assertAllEqual(
padded_tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids]
.shape.as_list(), [3, 200])
self.assertAllEqual(
padded_tensor_dict[fields.InputDataFields.groundtruth_dp_surface_coords]
.shape.as_list(), [3, 200, 4])
def test_context_features(self): def test_context_features(self):
context_memory_size = 8 context_memory_size = 8
context_feature_length = 10 context_feature_length = 10
......
...@@ -479,12 +479,9 @@ class SSDMetaArch(model.DetectionModel): ...@@ -479,12 +479,9 @@ class SSDMetaArch(model.DetectionModel):
ValueError: if inputs tensor does not have type tf.float32 ValueError: if inputs tensor does not have type tf.float32
""" """
with tf.name_scope('Preprocessor'): with tf.name_scope('Preprocessor'):
(resized_inputs, normalized_inputs = self._feature_extractor.preprocess(inputs)
true_image_shapes) = shape_utils.resize_images_and_return_shapes( return shape_utils.resize_images_and_return_shapes(
inputs, self._image_resizer_fn) normalized_inputs, self._image_resizer_fn)
return (self._feature_extractor.preprocess(resized_inputs),
true_image_shapes)
def _compute_clip_window(self, preprocessed_images, true_image_shapes): def _compute_clip_window(self, preprocessed_images, true_image_shapes):
"""Computes clip window to use during post_processing. """Computes clip window to use during post_processing.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment